Semaphore Deadlock

This problem has become very frustrating. This is supposed to be a simple program to create shared memory, then 9 readers and 3 writers of the shared memory counter field. There is a reader semaphore and a writer semaphore, so multiple readers can be in the critical section. The program works fine as-is on my quad-core computer, but on dual-cores it seems to deadlock, but I can't for the life of me spot the source of the problem. It only seems to deadlock on too many writers, so I suspect the do_writers function (starting at line 153), though it seems so simple. If anyone can spot it or suggest another problem I would be very grateful. BTW you can tell if it deadlocks if there is no output to the command line, in case anyone actually runs it.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#include <cstdio>
#include <cstdlib>
#include <iostream>

#include <sys/types.h>
#include <unistd.h>

// the following are for semaphores -----
#include <sys/sem.h>
#include <sys/ipc.h>

// the following are for shared memory ----
#include <sys/shm.h>

#define NUM_REPEAT		1000		// number of loops for testing
#define SEM_KEY			8712		// the semaphore key
#define SHM_KEY			5512		// the shared memory key
#define NUM_READERS		9		// number of reader processes
#define NUM_WRITERS		3		// number of writer processes
#define NUM_SEMAPHORES	        4		// number of semaphores
#define MAX_READER_SEQ	        3		// number of readers to allow
                                                //   before checking for queued
                                                //   writers
enum semaphores {
	SEM_COUNTER_WRITER = 0,		// main semaphore: protects counter
	SEM_COUNTER_READER,		// protects my_mem->concurrent_reader
	SEM_WRITER_TOTAL,	        // protects my_mem->writer_total
	SEM_READER_TOTAL		// protects my_mem->reader_total
};

// shared memory definition ----
struct my_mem {
	long int counter;
	int concurrent_reader;
	int writer_total;
	int reader_total;
};

void do_writer(int, struct sembuf*, struct my_mem*);
void do_reader(int, struct sembuf*, struct my_mem*);
int signal(int, struct sembuf*, int);
int wait(int, struct sembuf*, int);

int main(void) {
	pid_t process_id = getpid();

	int sem_id;			// the semaphore ID
	struct sembuf operations[1];	// define semaphore operations

	int shm_id;			// the shared memory ID
	int shm_size;			// the size of the shared memory
	struct my_mem* p_shm;	        // pointer to the attached shared memory

	// Semaphore control data structure ----
	union semun {
		int val;
		struct semid_ds *buf;
		ushort * arry;
	} argument;
	argument.val = 1; // the initial value of the semaphore

	// create a new semaphore set -----
	sem_id = semget(SEM_KEY, NUM_SEMAPHORES, 0666 | IPC_CREAT);
	if (sem_id < 0) {
		fprintf(stderr, "Failed to create a new semaphore. Terminating ..\n");
		exit(0);
	}

	// initialize the new semaphores to 1 ----
	for (int i = 0; i < NUM_SEMAPHORES; i++) {
		if (semctl(sem_id, i, SETVAL, argument) < 0) {
			fprintf(stderr,	"Failed to initialize the semaphore to 1. Terminating...\n");
			exit(0);
		}
	}

	// find the shared memory size in bytes ----
	shm_size = sizeof(my_mem);
	if (shm_size <= 0) {
		fprintf(stderr, "sizeof error in acquiring the shared memory size. Terminating...\n");
		exit(0);
	}

	// create a shared memory ----
	shm_id = shmget(SHM_KEY, shm_size, 0666 | IPC_CREAT);
	if (shm_id < 0) {
		fprintf(stderr, "Failed to create the shared memory. Terminating...\n");
		exit(0);
	}

	// attach the new shared memory ----
	p_shm = (struct my_mem *) shmat(shm_id, NULL, 0);
	if (p_shm == (struct my_mem*) -1) {
		fprintf(stderr, "Failed to attach the shared memory. Terminating...\n");
		exit(0);
	}

	// initialize the shared memory ----
	p_shm->counter = 0;
	p_shm->concurrent_reader = 0;
	p_shm->writer_total = 0;
	p_shm->reader_total = 0;

	// spawn child processes ----
	for (int i = 0; process_id && i < NUM_READERS + NUM_WRITERS - 1; i++) {
		process_id = fork();
	}

	// The child processes ----
	if (process_id == 0) {
		// get number of writers ----
		wait(sem_id, operations, SEM_WRITER_TOTAL);
		int num_writers = p_shm->writer_total;
		signal(sem_id, operations, SEM_WRITER_TOTAL);

		// get number of readers ----
		wait(sem_id, operations, SEM_READER_TOTAL);
		int num_readers = p_shm->reader_total;
		signal(sem_id, operations, SEM_READER_TOTAL);

		if (num_writers < NUM_WRITERS - 1) {
			do_writer(sem_id, operations, p_shm);
			exit(0);
		} else if (num_readers < NUM_READERS) {
			do_reader(sem_id, operations, p_shm);
			exit(0);
		}
	}
	// The parent process ----
	else {
		do_writer(sem_id, operations, p_shm);
		// wait for all readers & writers to complete ----
		for (int i = 0; p_shm->writer_total > 0 || p_shm->reader_total > 0; i++)
			;
		printf("Shared Memory Counter: %ld\n", p_shm->counter);
	}
	return 0;
}

void do_writer(int sem_id, struct sembuf* operations, struct my_mem* p_shm) {
	int writer_id;

	// increment number of writers ----
	wait(sem_id, operations, SEM_WRITER_TOTAL);
	writer_id = p_shm->writer_total + 1;
	p_shm->writer_total++;
	signal(sem_id, operations, SEM_WRITER_TOTAL);

	// wait for all readers & writers ----
	for (int i = 0; p_shm->reader_total < NUM_READERS || p_shm->writer_total < NUM_WRITERS; i++)
		;

	long microseconds;
	for (int i = 0; i < NUM_REPEAT; i++) {
		// generate random number of microseconds to sleep (10,000 - 90,000 us)
		microseconds = (long)(10000.0 * (1 + (10.0 * (rand() / (RAND_MAX + 1.0)))));
		wait(sem_id, operations, SEM_COUNTER_WRITER);
		// critical section ----
		printf("W%d is entering the critical section.\n", writer_id);
		p_shm->counter++;
		usleep(microseconds);
		printf("W%d is leaving the critical section.\n", writer_id);
		signal(sem_id, operations, SEM_COUNTER_WRITER);
	}

	// declare completion ----
	wait(sem_id, operations, SEM_WRITER_TOTAL);
	p_shm->writer_total--;
	signal(sem_id, operations, SEM_WRITER_TOTAL);
}

void do_reader(int sem_id, struct sembuf* operations, struct my_mem* p_shm) {
	int reader_id;

	// increment number of readers ----
	wait(sem_id, operations, SEM_READER_TOTAL);
	reader_id = p_shm->reader_total + 1;
	p_shm->reader_total++;
	signal(sem_id, operations, SEM_READER_TOTAL);

	// wait for all readers & writers ----
	for (int i = 0; p_shm->reader_total < NUM_READERS || p_shm->writer_total < NUM_WRITERS; i++)
		;

	int counter_val;
	bool check_writers = false;
	for (int i = 0; p_shm->writer_total > 0; i++) {
		// give writers a chance ----
		if (i % MAX_READER_SEQ == 0) {
			check_writers = true;
		}

		if (!check_writers) {
			// reader critical section ----
			wait(sem_id, operations, SEM_COUNTER_READER);
			p_shm->concurrent_reader++;
			if (p_shm->concurrent_reader == 1) {
				wait(sem_id, operations, SEM_COUNTER_WRITER);
			}
			signal(sem_id, operations, SEM_COUNTER_READER);

			// critical section ----
			printf("R%d is entering the critical section.\n", reader_id);
			counter_val = p_shm->counter;
			printf("R%d is leaving the critical section.\n", reader_id);
			wait(sem_id, operations, SEM_COUNTER_READER);

			// reader critical section ----
			p_shm->concurrent_reader--;
			if (p_shm->concurrent_reader == 0) {
				signal(sem_id, operations, SEM_COUNTER_WRITER);
			}
			signal(sem_id, operations, SEM_COUNTER_READER);
		} else if (p_shm->concurrent_reader == 0) {
			check_writers = false;
		}
	}

	// declare completion ----
	wait(sem_id, operations, SEM_READER_TOTAL);
	p_shm->reader_total--;
	signal(sem_id, operations, SEM_READER_TOTAL);
}

int signal(int sem_id, struct sembuf* operations, int sem_num) {
	// SIGNAL on the semaphore ----
	operations->sem_num = sem_num;
	operations->sem_op = 1;
	operations->sem_flg = 0;
	int ret_val = semop(sem_id, operations, 1);
	if (ret_val != 0) {
		fprintf(stderr, "V-OP (signal) failed on set %d, semaphore %d...\a\n", sem_id, sem_num);
	}
	return ret_val;
}

int wait(int sem_id, struct sembuf* operations, int sem_num) {
	// WAIT on the semaphore ----
	operations->sem_num = sem_num;
	operations->sem_op = -1;
	operations->sem_flg = 0;
	int ret_val = semop(sem_id, operations, 1);
	if (ret_val != 0) {
		fprintf(stderr, "P-OP (wait) failed on set %d, semaphore %d...\a\n", sem_id, sem_num);
	}
	return ret_val;
}

Have you tested the value for microseconds.
Any chance it may exceed 1,000,000 on different systems?

When I ran it in debugger, I set the microseconds to 1000001 and it stopped updating.
It continued to run, but nothing changed. The rnd number is generating between 10K and 110K.
When I restarted, it was back to normal (R's and W's).

I also changed printf to cout and fprintf to cerr.
It was odd, it was outputting to debugger, but not the console.

Yeah, I found the same thing; it works running in GDB, but not when executing it outright. I also commented out the random number sleep at one point and it made no difference.
Topic archived. No new replies allowed.