283 lines
8.3 KiB
C++
283 lines
8.3 KiB
C++
|
|
/*
|
|
* Josh Holtrop
|
|
* 2008-10-01
|
|
* Grand Valley State University
|
|
* CS677
|
|
* Programming Assignment #2
|
|
*/
|
|
|
|
#include <iostream>
|
|
#include <fstream>
|
|
#include <vector>
|
|
#include <pthread.h>
|
|
#include <sys/time.h> /* gettimeofday(), struct timeval */
|
|
using namespace std;
|
|
|
|
#define eq(x, y) ( ( (x) == (y) ) || ( (x) == '?' ) || ( (y) == '?' ) )
|
|
|
|
void usage(char * prog);
|
|
void * calcSimMatrixThread(void * arg);
|
|
bool readFile(char * fileName, vector<char> & v);
|
|
|
|
pthread_barrier_t barrier;
|
|
int num_threads;
|
|
int * matrix;
|
|
vector<char> * s;
|
|
vector<char> * t;
|
|
|
|
struct retval_t
|
|
{
|
|
int max_val;
|
|
int max_i;
|
|
int max_j;
|
|
} retval;
|
|
|
|
/* Print basic usage information */
|
|
void usage(char * prog)
|
|
{
|
|
cout << "Usage: " << prog << " [-n <num_threads>] <file1> <file2>" << endl;
|
|
exit(42);
|
|
}
|
|
|
|
/*
|
|
* taskAllocate() will divide a set of total_tasks tasks into
|
|
* total_workers groups, as evenly as possible
|
|
* Parameters:
|
|
* total_tasks : IN : the total number of tasks to divide up
|
|
* total_workers : IN : the total number of workers to allocate tasks to (>0)
|
|
* this_id : IN : the id (base 0) of the task calling us for work
|
|
* first_task_id : OUT : the id (base 0) of the first task for this worker
|
|
* num : OUT : the number of tasks assigned to this worker
|
|
*/
|
|
inline void taskAllocate(int total_tasks, int total_workers, int this_id,
|
|
int * first_task_id, int * num)
|
|
{
|
|
int l_num;
|
|
int leftovers = total_tasks % total_workers; /* num of "leftover" tasks */
|
|
if (this_id < leftovers)
|
|
{
|
|
l_num = total_tasks / total_workers + 1; /* do one of the leftovers */
|
|
*first_task_id = l_num * this_id;
|
|
}
|
|
else
|
|
{
|
|
l_num = total_tasks / total_workers;
|
|
*first_task_id = l_num * this_id + leftovers;
|
|
}
|
|
*num = l_num;
|
|
}
|
|
|
|
int main(int argc, char * argv[])
|
|
{
|
|
vector<char> files[2];
|
|
num_threads = 1;
|
|
int file_to_read = 0;
|
|
|
|
/* Process command-line arguments */
|
|
for (int i = 1; i < argc; i++)
|
|
{
|
|
if ( ! strcmp("-n", argv[i]) )
|
|
{
|
|
if (i == argc - 1)
|
|
usage(argv[0]);
|
|
i++;
|
|
num_threads = atoi(argv[i]);
|
|
}
|
|
else
|
|
{
|
|
if (file_to_read < 2)
|
|
readFile(argv[i], files[file_to_read]);
|
|
else
|
|
usage(argv[0]);
|
|
file_to_read++;
|
|
}
|
|
}
|
|
|
|
if (file_to_read != 2)
|
|
usage(argv[0]);
|
|
|
|
s = &files[0];
|
|
t = &files[1];
|
|
pthread_t * threads = new pthread_t[num_threads];
|
|
matrix = new int[(files[0].size() + 1) * (files[1].size() + 1)];
|
|
pthread_barrier_init(&barrier, NULL, num_threads);
|
|
|
|
struct timeval before, after;
|
|
gettimeofday(&before, NULL); /* Start timing */
|
|
|
|
/* Create num_threads worker threads */
|
|
for (int i = 0; i < num_threads; i++)
|
|
{
|
|
int * arg = new int;
|
|
*arg = i;
|
|
int ret = pthread_create(&threads[i], NULL, &calcSimMatrixThread, arg);
|
|
if (ret)
|
|
{
|
|
cerr << "Error " << ret << " when creating thread!" << endl;
|
|
return -4;
|
|
}
|
|
}
|
|
|
|
/* Wait for the worker threads to exit and accumulate their results */
|
|
int max_val = 0, max_i = 0, max_j = 0;
|
|
for (int i = 0; i < num_threads; i++)
|
|
{
|
|
struct retval_t * retval;
|
|
pthread_join(threads[i], (void **) &retval);
|
|
if (retval->max_val == max_val)
|
|
{
|
|
if ( (retval->max_i + retval->max_j) > (max_i + max_j) )
|
|
{
|
|
max_i = retval->max_i;
|
|
max_j = retval->max_j;
|
|
}
|
|
}
|
|
else if (retval->max_val > max_val)
|
|
{
|
|
max_val = retval->max_val;
|
|
max_i = retval->max_i;
|
|
max_j = retval->max_j;
|
|
}
|
|
delete retval;
|
|
}
|
|
|
|
/* Print the maximum value and position */
|
|
cout << "Maximum value is " << max_val << " at position ("
|
|
<< max_i << ", " << max_j << ")" << endl;
|
|
|
|
gettimeofday(&after, NULL); /* Stop timing */
|
|
double time_before = before.tv_sec + before.tv_usec / 1000000.0;
|
|
double time_after = after.tv_sec + after.tv_usec / 1000000.0;
|
|
double diff = time_after - time_before;
|
|
cout << "Elapsed time: " << diff << " seconds." << endl;
|
|
|
|
/* Clean up after ourselves */
|
|
pthread_barrier_destroy(&barrier);
|
|
delete[] matrix;
|
|
delete[] threads;
|
|
return 0;
|
|
}
|
|
|
|
/* Read a file into a vector of non-space characters */
|
|
bool readFile(char * fileName, vector<char> & v)
|
|
{
|
|
ifstream in(fileName);
|
|
if (!in.is_open())
|
|
return false;
|
|
for(;;)
|
|
{
|
|
char chr;
|
|
in >> chr;
|
|
if (in.eof())
|
|
break;
|
|
v.push_back(chr);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/* Compute portions of the similarity matrix between two character arrays */
|
|
void * calcSimMatrixThread(void * arg)
|
|
{
|
|
int * realarg = (int *) arg;
|
|
int id = *realarg;
|
|
int s_size = s->size();
|
|
int t_size = t->size();
|
|
int last_step = s_size + t_size;
|
|
/* Create F as a pointer to a two-dimensional array of size
|
|
* s_size+1 X t_size+1
|
|
* This allows us to keep the similarity matrix in a stored area
|
|
* but still to access it using two-dimensional array syntax so the
|
|
* compiler does the math for us of calculating the offsets into
|
|
* the array based on s_size and t_size
|
|
*/
|
|
int (*F)[s_size+1][t_size+1] = (int (*) [s_size+1][t_size+1]) matrix;
|
|
int max_i = 0, max_j = 0, max_val = 0;
|
|
int first_task_id, num_tasks;
|
|
taskAllocate(t_size+1, num_threads, id, &first_task_id, &num_tasks);
|
|
for (int i = 0, idx = first_task_id; i < num_tasks; i++, idx++)
|
|
(*F)[0][idx] = 0; /* set first row to 0's */
|
|
taskAllocate(s_size+1, num_threads, id, &first_task_id, &num_tasks);
|
|
for (int i = 0, idx = first_task_id; i < num_tasks; i++, idx++)
|
|
(*F)[idx][0] = 0; /* set first column to 0's */
|
|
pthread_barrier_wait(&barrier); /* Wait for all threads to finish */
|
|
for (int step = 2; step <= last_step; step++)
|
|
{
|
|
int first_i = step - 1;
|
|
int first_j = 1;
|
|
int last_i = 1;
|
|
int last_j = step - 1;
|
|
if (first_i > s_size) /* Adjust if past bottom of matrix */
|
|
{
|
|
first_j += (first_i - s_size);
|
|
first_i = s_size;
|
|
}
|
|
if (last_j > t_size) /* Adjust if past right of matrix */
|
|
{
|
|
last_i += (last_j - t_size);
|
|
last_j = t_size;
|
|
}
|
|
num_tasks = (last_j - first_j) + 1; /* first through last inclusive */
|
|
taskAllocate(num_tasks, num_threads, id, &first_task_id, &num_tasks);
|
|
for (int i = first_i - first_task_id, /* this thread starting i */
|
|
j = first_j + first_task_id, /* this thread starting j */
|
|
task_id = 0;
|
|
task_id < num_tasks;
|
|
i--, j++, task_id++) /* loop diagonally num_tasks times */
|
|
{
|
|
/* Compute the value for the matrix */
|
|
(*F)[i][j] =
|
|
max(
|
|
max(
|
|
(*F)[i][j-1] - 2,
|
|
(*F)[i-1][j-1] +
|
|
(eq(s->at(i-1), t->at(j-1)) ? 1 : -1)
|
|
),
|
|
max(
|
|
(*F)[i-1][j] - 2,
|
|
0
|
|
)
|
|
);
|
|
/* See if we found a new maximum value */
|
|
if ((*F)[i][j] > max_val)
|
|
{
|
|
max_val = (*F)[i][j];
|
|
max_i = i;
|
|
max_j = j;
|
|
}
|
|
else if ((*F)[i][j] == max_val)
|
|
{
|
|
/* If we found a value the same as our current maximum
|
|
* value, see if it has a greater i+j value */
|
|
if ( (i + j) > (max_i + max_j) )
|
|
{
|
|
max_i = i;
|
|
max_j = j;
|
|
}
|
|
}
|
|
}
|
|
/* Wait for all threads to proceed to the next step together */
|
|
pthread_barrier_wait(&barrier);
|
|
}
|
|
|
|
#if 0
|
|
cout << "Matrix: " << s_size+1 << " x " << t_size+1 << endl;
|
|
for (int i = 0; i <= s_size; i++)
|
|
{
|
|
for (int j = 0; j <= t_size; j++)
|
|
{
|
|
printf("%2d ", (*F)[i][j]);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
|
|
struct retval_t * retval = new struct retval_t;
|
|
retval->max_val = max_val;
|
|
retval->max_i = max_i;
|
|
retval->max_j = max_j;
|
|
|
|
delete realarg;
|
|
return retval;
|
|
}
|