Incorrect count output / Having difficulty trying to create a HashTable/Set using open addressing

I'm trying to create a program that opens a .txt file containing a speech and assigns each word to a space in the array/set based on the hash value. Collisions are accounted for using open addressing method. The program should be able to perform the following functions: add(), remove(), find(), count() which keeps count of the elements IN the array/set, and loadfactor(). A template header.h file was provided that required some filling in, but my unfamiliarity with that style of coding was making it difficult for me to understand it. Below I have provided the code I have so far, and everything seems to be working **except** the mCount. The speech contains about 300 words but when I run the code, the count output shows 17. I'm assuming the error is in my resizing function but I am unsure.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
//hashset.h file
#pragma once

#include <functional>
#include <vector>

template <typename TValue, typename TFunc>
class HashSet
{
private:
	// Unlike Java, the C++ hashtable array won't be full of null references.
	// It will be full of "Entry" objects, each of which tracks its current state
	// as EMPTY, FULL (with a value), or NIL (value was once here, but was removed).
	class Entry
	{
	public:
		enum EntryState { EMPTY, FULL, NIL };

		TValue value;
		EntryState state;

		Entry() : value(), state(EMPTY) {}
		Entry(const TValue& v) : value(v), state(EMPTY) {}
	};

	TFunc mHash;
	std::vector<Entry> mTable;
	std::size_t mCount;

public:
	// Constructs a hashtable with the given size, using the given function for
	// computing h(k).
	// hash must be a callable object (function, functor, etc.) that takes a parameter
	//    of type TValue and returns std::size_t (an integer).
	HashSet(int size, TFunc hash) : mHash(hash)
	{
		// hashtable array cannot be same data type as that of what i sbeing stored (cannot be string) 
		// requirement #4 - if user inputs array size that is not a power of 2, constructor must round to nearest power of 2 value

		/******************* my edit*****************/
		size = pow(2, (int(log(size - 1) / log(2)) + 1));
		/********************************************/

		mTable.resize(size); // resizes the vector to have given size.
							 // Each element will be default-constructed to have state EMPTY.
	}

	/******************* my edit*****************/
	//int mHash(const TValue& hash) {
		//size_t hash_value = hash % size;
		//return hash_value;
	//}
	/********************************************/

	HashSet::resize(int new_size) {
		HashSet aux{ new_size, mHash }; //double the size, same hash function
		for (const auto& entry : mTable)
			if (entry.state == Entry::FULL) //there is an element
				aux.add(entry.value); //insert it on the new set
		*this = aux;
	}

	// Inserts the given value into the set.
	void add(const TValue& value)
	{
		// Use the type std::size_t for working with hash table indices.
		// Invoke the mHash function, passing the key to calculate h(k), as in

		// size_t hashCode = mHash(value);

		// Mod down to size.
		// Go to the table at that index and do the insertion routine.

		/******************* my edit*****************/
		// Note, if table is full when trying to add an element, it should double in size 
		// to keep table size a power of 2
		size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index
		
		if (double(mCount) / mTable.size() > 0.8) // load factor
			this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition 

		if (mTable[hashCode].state == Entry::EMPTY)
			mTable[hashCode].value = value; // store value in vector index specified by hashCode

		else {
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing to find next open space
				if (mTable[hashCode].state == Entry::EMPTY) {
					hashCode = (hashCode + ((i ^ 2) + i) >> 2) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
				}

				else
					break; // exit for-loop
			}
			mTable[hashCode].value = value; // store value in vector index specified by new hashCode
		}
		/********************************************/
	}

	// Returns true if the given value is present in the set.
	bool find(const TValue& key)
	{
		/******************* my edit*****************/
		size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
		if (mTable[hashCode].value == key)
			return true;

		else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // getting error NIL identifier not found
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing again to find key
				if (mTable[hashCode].value != key) {
					hashCode = (hashCode + ((i ^ 2) + i) >> 2) % mTable.size();
				}

				else
					break; // exit for-loop
			}
			return true; // value found at speecified location
		}

		else
			return false; // return false when hash index == NULL
		/********************************************/
	}

	// Removes the given value from the set.
	void remove(const TValue& key)
	{
		/******************* my edit*****************/
		size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
		if (mTable[hashCode].value == key)
			mTable[hashCode].status = Entry::NIL; // replace value with NIL so find() op does not return a false when searching for element 

		else if (mTable[hashCode].status != Entry::EMPTY || mTable[hashcode].state == Entry::NIL) {
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing again to find key
				if (mTable[hashCode].value != key) {
					hashCode = (hashCode + ((i ^ 2) + i) >> 2) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
				}

				else
					mTable[hashCode].status = Entry::NIL; // if found after open addressing, replace with NIL
			}
		}

		else
			cout << "Element is not in the list and cannot be removed" << endl; // element not in list
		/********************************************/
	}
};
// main file - no template provided, entirely my code
#include "hashset.h"
#include <fstream>
#include <iostream>
#include <string>
using namespace std;
int main()
{
    const int input = 50; // must initially start at 50

    ifstream src("speech.txt");
    vector<string> Words;

    if (!src)
    {
        cout << "Error opening the file" << endl;
        exit(1);
    }
    string word;
    while (src >> word)
    {
        // TO DO remove unwanted characters from word
        cout << word << endl;  //Just for testing
        //Words.push_back(word);
    }

    HashSet<size_t, string> obj1(input, word);

    for (int i = 0; i < word.size(); i++) {
        // place each word in individual table space
    }

    system("pause");

    exit(0);
}
Last edited on
^ is not power but xor operation
pow(2, n) may be wrote as 1<<n

1
2
3
4
5
6
	HashSet(int size, TFunc hash) : mHash(hash)
	{
		//...
		int hash_value = hash % size;
		return hash_value;
	}
¿what are you trying to do there? `hash' is a function, ¿what do you mean to do with hash%size?
also, a constructor constructs an object, there is nothing to return

you said that you wanted to store words (strings), so you need a function to map from std::string to size_t (the hash function)
HashSet<std::string, std::hash<std::string> > word_set{ 42, std::hash<std::string>{} };
that will construct your set using the hash function provided in std


1
2
3
4
5
void add(const TValue& value)
	{
		size_t hashCode = mHash(value); // mod value by table size to get starting index
		if (mTable[hashCode] == NULL)
			mTable[hashCode] = value; // store value in vector index specified by hashCode 

you didn't mod the code size_t hashCode = mHash(value) % mTable.size();
each element of `mTable' is an `Entry'. An `Entry' has two members: the `value' that you want to store and a `state' flag that tells you that indeed have something stored
so you need to do
1
2
if (mTable[hashCode].state == Entry::EMPTY or mTable[hashCode].state == Entry::NIL) //may use the space
   mTable[hashCode].value = value; //store the element 



1
2
3
4
5
6
7
8
9
10
11
12
13
14
		else {
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing to find next open space
				if (mTable[hashCode] != NULL) {
					hashCode = (hashCode + ((i ^ 2) + i) >> 2) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
				}

				else if (i == mTable.size()) // Table is full, resize table
					HashSet((2* mTable.size()), value);
				else
					break; // exit for-loop
			}
			mTable[hashCode] = value; // store value in vector index specified by new hashCode
		}
there are repeated mistakes, see above. also, >>2 is not the same as /2 (don't know if the formula is correct)
you have the `mCount' variable to know when the table is full (remember to update it), may test that at the beginning of the function
1
2
3
void add(const TValue& value){
	if (double(mCount)/mTable.size() > 0.8) //load factor
		this->resize(2*mTable.size());
to do the resize() you need to rehash all the elements
1
2
3
4
5
6
7
HashSet::resize(int new_size){
	HashSet aux {new_size, mHash}; //double the size, same hash function
	for(const auto &entry: mTable)
		if(entry.state == Entry::FULL) //there is an element
			aux.add(entry.value); //insert it on the new set
	*this = aux; //¿do you understand what happens here? ¿how may you make it more efficient?
}


similar mistakes in the find() and remove() functions, you'll need to compare mTable[hashCode].value == key
note that you need to write Entry::NIL, Entry::FULL, Entry::EMPTY
also, in the else if lines 96--107 you always return true


main()
1
2
3
4
5
6
7
8
9
10
    string word;
    while (src >> word)
    {
       //...
    }
    //¿what value has `word' here?

    for (int i = 0; i < word.size(); i++) { //¿traversing the characters in the string?
        HashSet obj1(input, word[i]); //¿how many sets do you plan to create?
    }

1
2
3
4
5
6
HashSet<std::string, std::hash<std::string> > word_set{ 50, std::hash<std::string>{} };
string word;
while (src>>word)
   word_set.add(word);
//word_set has all the words in the file
//you may now search or remove elements 

I have made edits above with your suggestions. If you could go through it to ensure that I'm still heading towards the right direction that would be appreciated. One thing that I'm definitely unclear on is what you're suggested code is doing in main(). Also, I am getting a compile error with the resize function you provided.
Last edited on
> Also, I am getting a compile error with the resize function you provided.
when getting an error, provide the error message
also, would appreciate if you update in another post instead of touching the op.
1
2
//HashSet::resize(int new_size) {
void resize(int new_size) {
forgot the return type, and the HashSet:: preamble is used when defining member functions outside the class (there I was just saying «this is a member function»)


in `add()'
you may insert new elements in positions that are not already taken. Those are marked with EMPTY or NIL
however, you need to be careful of not inserting the same element twice on the set
in pseudocode
1
2
3
4
5
6
7
8
9
10
11
12
add(value):
	if find(value): //already there, do nothing
		return
	hash_code = hash(value) % size
	//look for a free space
	for K in range(0, size):
		position = (hash_code + K) % size //adapt to your collision algorithm
		if table[position].state in {empty, nil}:
			table[position].value = value
			table[position].state = full //now this cell does have an element
			break
	count += 1 //we keep track of how many elements there are in the set 


mCount holds the number of elements in your set. It should be initialised to 0 in the HashSet constructor and updated on the add() and remove() operations

in your find() function you have
1
2
3
4
		else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { //line 108
			//loop traversing the collisions...
			return true; //line 118
		}
if you enter that if, you always return true
when traversing the collisions, if you find an empty cell you should return false.
so there are three breaking conditions:
- you found the element, return true
- you reach an empty cell, return false
- you traversed the full table, return false
1
2
3
4
5
6
7
8
9
find(value):
	hash_code = hash(value) % size
	for K in range(0, size):
		position = (hash_code + K) % size
		if table[position].state == empty:
			return false
		if table[position].state == full and table[position].value == value:
			return true
	return false



> One thing that I'm definitely unclear on is what you're suggested code is doing in main()
ok, let's start over
you have a text file and want to analyse the words used on it. For this you'll insert all those words into a set
1
2
3
4
//pseudocode
HashSet word_set
while read(word):
	word_set.insert(word)

you defined your HashSet class like this
1
2
template <typename TValue, typename TFunc>
class HashSet
it is a template with two parameters
the first parameter indicates the type of the element stored in the set, so you may have a set of integers, a set of strings, a set of animals, whatever
the second parameter is the hash function, which maps each value into a position on your table, for example
1
2
3
size_t hash_string(string s){
	return s.size();
}
that will hash the strings according to their length (expect a lot of collisions)
I don't know what hash function are you expected to use, so suggested std::hash

to create your set then
1
2
3
4
5
6
7
HashSet< //want a hash set
	std::string, //that stores strings
	std::hash<std::string> //using std::hash for the hash function
> word_set{ //variable name
	50, //starts with a capacity of 50
	std::hash<std::string>{} //and uses std::hash
};
now that you have a set you can operate on it
1
2
3
4
5
6
7
8
9
word_set.add("hello");
word_set.add("brave");
word_set.add("new");
word_set.add("world");
word_set.remove("brave");
if (word_set.find("hello"))
	cout << "nice to meet you\n";
else
	cout << "die heretic\n";

Last edited on
1
2
3
4
		size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index
		
		if (double(mCount) / mTable.size() > 0.8) // load factor
			this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition  

If you resize, then the computed hashCode becomes invalid. Compute hashCode after you resize, not before.

1
2
3
4
5
6
7
8
9
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing to find next open space
				if (mTable[hashCode].state == Entry::EMPTY) {
					hashCode = (hashCode + ((i ^ 2) + i) >> 2) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
				}

				else
					break; // exit for-loop
			}

So you break out of the loop when you discover a cell that isn't empty. That doesn't make sense. Also, shouldn't you check for cells that are NIL? I guess I don't understand the purpose of NIL cells. They seem to become unusable.

add(), find() and remove() all share a big chunk of common logic: finding the index where the value is, or where it should go. You should create a method that does that. This will also fix a bug in add: it's currently possible to insert the same value multiple times.

Below are the edits I've made. I was able to run the compiler with these edits and I tested the adding, removing and finding functions and they appear to be working properly.

constructor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
public:
	// Constructs a hashtable with the given size, using the given function for
	// computing h(k).
	// hash must be a callable object (function, functor, etc.) that takes a parameter
	//    of type TValue and returns std::size_t (an integer).
	HashSet(int size, TFunc hash) : mHash(hash)
	{
		mCount = 0; // holds count of the number of items in the set
		// hashtable array cannot be same data type as that of what i sbeing stored (cannot be string) 
		// requirement #4 - if user inputs array size that is not a power of 2, constructor must round to nearest power of 2 value

		/******************* my edit*****************/
		size = pow(2, (int(log(size - 1) / log(2)) + 1));
		/********************************************/

		mTable.resize(size); // resizes the vector to have given size.
							 // Each element will be default-constructed to have state EMPTY.
	}

add() and resize()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
void resize(int new_size) {
		HashSet aux{ new_size, mHash }; //double the size, same hash function
		for (const auto& entry : mTable)
			if (entry.state == Entry::FULL) //there is an element
				aux.add(entry.value); //insert it on the new set
		*this = aux;
	}

	// Inserts the given value into the set.
	void add(const TValue& value)
	{
		// Note, if table is full when trying to add an element, it should double in size 
		// to keep table size a power of 2
		
		if (double(mCount) / mTable.size() > 0.8) // load factor
			this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition 

		size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index

		if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) // NIL space CAN be replaced with value
			mTable[hashCode].value = value; // store value in vector index specified by hashCode

		else {
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing to find next open space
				if (mTable[hashCode].state != Entry::EMPTY) {
					hashCode = ((mHash(value) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
				}

				else if (mTable[hashCode].value == value) { // account for duplicates 
					break; // exit for-loop
				}

				else if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
					mTable[hashCode].value = value; // store value in vector index specified by new hashCode
					mCount++; // increment counter when word is added
					break; // exit for-loop
				}

				else
					break; // exit for-loop
			} 
		}
	}

find()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
bool find(const TValue& key){
                size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
		if (mTable[hashCode].value == key)
			return true;

		else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) {
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing again to find key
				if (mTable[hashCode].value != key)
					hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();

				else if (mTable[hashCode].value == key) {
					return true; // value found at speecified location
					break; // exit for-loop as first instance of value has been found
				}

				else if (i == mTable.size()) // end of table reached
					return false;
			}
		}

		else
			return false; // return false when hash index == NULL
	}

remove()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
void remove(const TValue& key){
                size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
		if (mTable[hashCode].value == key)
			mTable[hashCode].value = Entry::NIL; // replace value with NIL so find() op does not return a false when searching for element 

		else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) {
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing again to find key
				if (mTable[hashCode].value != key) {
					hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
				}

				else {
					mTable[hashCode].value = Entry::NIL; // if found after open addressing, replace with NIL
					mCount--; // decrement element counter
				}
			}
		}
}

main()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
int main()
{
    ifstream src("speech.txt");
    vector<string> Words;

    if (!src)
    {
        cout << "Error opening the file" << endl;
        exit(1);
    }

    // HashSet<size_t, string> obj1(50, word);
    HashSet<std::string, std::hash<std::string> > obj1{ 50, std::hash<std::string>{} };

    string word;
    while (src >> word)
    {
        obj1.add(word);
        cout << word << endl;  //Just for testing
    }
    
    obj1.add("hello");
    if (obj1.find("hello"))
        cout << " hello is in the set " << endl;
    else
        cout << " hello is not in set " << endl;

    obj1.add("time11");
    if (obj1.find("time11"))
        cout << " time11 is in the set " << endl;
    else
        cout << "time11 is not in set " << endl;

    obj1.add("done11");
    if (obj1.find("done11"))
        cout << " done11 is in the set " << endl;
    else
        cout << "done11 is not in set " << endl;

    obj1.remove("done11");
    if (obj1.find("done11"))
        cout << " done11 is in the set " << endl;
    else
        cout << "done11 is not in set " << endl;

    if (obj1.find("time11"))
        cout << " time11 is in the set " << endl;
    else
        cout << "time11 is not in set " << endl;

    system("pause");
	exit(0);
}
Last edited on
> getting error NIL identifier not found
¿do you still get that error or just didn't update the comment?

you should create tests for your functions
a print() method that shows all FULL entries may help
(would be nice if you could test the collision algorithm too)
Yes, that was my mistake. I forgot to comment that out
Add and remove need to update the bucket's state.

i ^ 2 does not compute i2. It computes i xor 2. Use i*i instead.

If add() replaces an existing value, then you shouldn't increment mCount.
After reading a up on open address hashing, it appears that the NIL items are there to avoid invalidating a cluster of sequential items. See the wikipedia page, especially note 2 on the pseudo-code: https://en.wikipedia.org/wiki/Open_addressing

hashCode = (hashCode + ((i ^ 2) + i) >> 2) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2
The code doesn't match the comment. >> 2 divides by 4, not 2.

You really should create a private method that finds the slot where the item is, or where it should go if inserted:
1
2
3
4
    // Return the slot where val is, or where it should go
    unsigned find_slot(const TValue &val) {
        // Your code here
    }

Once you have this, add, find and remove are trivial:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
    // Inserts the given value into the set.
    void add(const TValue & value)
    {
        size_t slot = find_slot(value);
        if (mTable[slot].state == Entry::EMPTY) ++mCount;
        mTable[slot].value = value;
        mTable[slot].state = Entry::FULL;
    }

    bool find(const TValue & key)
    {
        size_t slot = find_slot(key);
        return (mTable[slot].state == Entry::FULL);
    }

    void remove(const TValue & key)
    {
        size_t slot = find_slot(key);
        if (mTable[slot].state == Entry::FULL) {
            mTable[slot].state = Entry::NIL;
            --mCount;
        }
    }


I was able to fix the issue I was having, thank you for your help

.h file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#pragma once

#include <iostream>

#include <cmath>
#include <functional>
#include <vector>

using namespace std;

template <typename TValue, typename TFunc>
class HashSet
{
private:
	// Unlike Java, the C++ hashtable array won't be full of null references.
	// It will be full of "Entry" objects, each of which tracks its current state
	// as EMPTY, FULL (with a value), or NIL (value was once here, but was removed).
	class Entry
	{
	public:
		enum EntryState { EMPTY, FULL, NIL };

		TValue value;
		EntryState state;

		Entry() : value(), state(EMPTY) {}
		Entry(const TValue& v) : value(v), state(EMPTY) {}
	};

	TFunc mHash;
	std::vector<Entry> mTable;
	std::size_t mCount;

public:
	// Constructs a hashtable with the given size, using the given function for
	// computing h(k).
	// hash must be a callable object (function, functor, etc.) that takes a parameter
	//    of type TValue and returns std::size_t (an integer).
	HashSet(int size, TFunc hash) : mHash(hash)
	{
		// initialize count
		mCount = 0;

		// hashtable array cannot be same data type as that of what is being stored (cannot be string) 
		// requirement #4 - if user inputs array size that is not a power of 2, constructor must round to nearest power of 2 value
		size = pow(2, ceil(log(size) / log(2)));
		mTable.resize(size); // resizes the vector to have given size.
							 // Each element will be default-constructed to have state EMPTY.
	}

	void resize(int new_size) {
		HashSet aux{ new_size, mHash }; //double the size, same hash function
		for (const auto& entry : mTable) {
			if (entry.state == Entry::FULL) //there is an element
				aux.add(entry.value); //insert it on the new set
		}

		mTable = aux.mTable;
		/*********testing***********************************
		std::cout << "+++++++++++++++++++++++++++++++++++" << endl;
		std::cout << "New Table size: " << mTable.size() << endl;
		std::cout << "+++++++++++++++++++++++++++++++++++" << endl;
		*/
	}

	// Inserts the given value into the set.
	void add(const TValue& value)
	{
		// Use the type std::size_t for working with hash table indices.
		// Invoke the mHash function, passing the key to calculate h(k), as in

		// size_t hashCode = mHash(value);

		// Mod down to size.
		// Go to the table at that index and do the insertion routine.

		// Note, if table is full when trying to add an element, it should double in size 
		// to keep table size a power of 2

		if (double(mCount) / mTable.size() > 0.8) // load factor comparison
			this->resize(2 * mTable.size()); // call resize function if array is too small to accommodate addition 

		size_t hashCode = mHash(value) % mTable.size(); // mod value by table size to get starting index

		if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
			mTable[hashCode].value = value; // store value in vector index specified by hashCode
			mTable[hashCode].state = Entry::FULL; // update flag
			mCount++; // increment counter when word is added
		}

		else {
			for (std::size_t i = 0; i < mTable.size(); i++) {
				// use open addressing to find next open space
				hashCode = ((mHash(value) % mTable.size()) + ((int)(pow(i, 2) + i) / 2)) % mTable.size(); // h(k) + f(i) or h(k) + ((i^2 + i)) / 2

				if (mTable[hashCode].value == value) { // account for duplicates 
					break; // exit for-loop
				}
				else if (mTable[hashCode].state == Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // NIL space CAN be replaced with value
					mTable[hashCode].value = value; // store value in vector index specified by new hashCode
					mTable[hashCode].state = Entry::FULL; // update flag
					mCount++; // increment counter when word is added
					break; // exit for-loop
				}
			}
		}
	}

	// Returns true if the given value is present in the set.
	bool find(const TValue& key)
	{
		size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
		if (mTable[hashCode].value == key)
			return true;

		else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state == Entry::NIL) { // check that set is not empty or has a NIL state
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing again to find key
				if (mTable[hashCode].value != key)
					hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();

				else if (mTable[hashCode].value == key) {
					return true; // value found at speecified location
					break; // exit for-loop as first instance of value has been found
				}

				//else if (i == mTable.size()) // end of table reached, element not in set
					//return false;
			}
		}

		else // end of table reached, element was not in set
			return false;
	}

	// Removes the given value from the set.
	void remove(const TValue& key)
	{
		size_t hashCode = mHash(key) % mTable.size(); // mod value by table size to get starting index to do retrace
		if (mTable[hashCode].value == key) {
			mTable[hashCode].value = Entry::NIL; // replace value with NIL so find() op does not return a false when searching for element 
			mTable[hashCode].state = Entry::NIL; // update flag
			mCount--; // decrement element counter
		}

		else if (mTable[hashCode].state != Entry::EMPTY || mTable[hashCode].state != Entry::NIL) { // check that there is a value to be removed
			for (std::size_t i = 1; i < mTable.size(); i++) {
				// use open addressing again to find key
				if (mTable[hashCode].value != key) {
					hashCode = ((mHash(key) % mTable.size()) + ((int)(pow(i, 2) + i) >> 1)) % mTable.size();
				}

				else {
					mTable[hashCode].value = Entry::NIL; // if found after open addressing, replace with NIL
					mTable[hashCode].state = Entry::NIL; // update flag
					mCount--; // decrement element counter
				}
			}
		}
	}

	int count() {
		return mCount;
	}

	double loadFactor() {
		double a = double(mCount) / mTable.size();
		return a;
	}

	void print() {
		for (int i = 0; i < mTable.size(); i++) {
			Entry e = mTable[i];

			if (e.state == Entry::FULL)
				std::cout << e.value << ", ";
		}

		std::cout << endl;
	}
};

main()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
#include "hashset.h"
#include <cmath>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <algorithm>
using namespace std;

int main()
{
    string testline;
    vector<string> word;

    HashSet<std::string, std::hash<std::string> > obj1{ 50, std::hash<std::string>{} };

    ifstream Test("speech.txt");

    if (!Test)
    {
        cout << "There was an error opening the file.\n";
        return 0;
    }

    //store words in vector
    while (Test >> testline) {
        // remove all non-alphanumeric characters
        testline.erase(std::remove(testline.begin(), testline.end(), ','), testline.end());
        testline.erase(std::remove(testline.begin(), testline.end(), ':'), testline.end());
        testline.erase(std::remove(testline.begin(), testline.end(), '?'), testline.end());
        testline.erase(std::remove(testline.begin(), testline.end(), ';'), testline.end());
        testline.erase(std::remove(testline.begin(), testline.end(), '.'), testline.end());

        std::cout << "WORD = \"" << testline << "\"\n";
        if (testline == "—")
            continue;

        if (!testline.empty())
            word.push_back(testline);
    }

    // continuously add modified word to hast
    for (int i = 0; i < word.size(); i++) {
        obj1.add(word[i]);
    }

    obj1.print();
    cout << "Total number of distinct words: " << obj1.count() << endl << endl;
    obj1.add("political");
    obj1.add("generation");
    cout << "find() returns: " << obj1.find("generation") << endl << endl;
    obj1.remove("try");
    obj1.remove("thing");
    obj1.print();
    cout << "Total number of distinct words: " << obj1.count() << endl << endl;
    cout << "find() returns: " << obj1.find("thing") << endl;
}
Last edited on
The problem is in resize:
if (entry.state == Entry::FULL && entry.state == Entry::EMPTY && entry.state == Entry::NIL)
When is the state FULL and EMPTY?

You only want to insert items where the entry is FULL.
also you are missing mTable[hashCode].state = Entry::FULL; in your add() function


> If add() replaces an existing value, then you shouldn't increment mCount.
add() never replaces, if the entry is EMPTY or NIL, the there is no element.
Topic archived. No new replies allowed.