From 446b8464b3338bb752a32374153f793c33048105 Mon Sep 17 00:00:00 2001 From: josh Date: Sat, 1 Nov 2008 16:56:50 +0000 Subject: [PATCH] updating hw.tex, added src from hw7 git-svn-id: svn://anubis/gvsu@227 45c1a28c-8058-47b2-ae61-ca45b979098e --- cs677/hw5/hw.tex | 63 +++++---------- cs677/hw5/src/Makefile | 17 ++++ cs677/hw5/src/floyd-parallel.cc | 126 ++++++++++++++++++++++++++++++ cs677/hw5/src/floyd-sequential.cc | 125 +++++++++++++++++++++++++++++ cs677/hw5/src/gen_adj_matrix.c | 39 +++++++++ cs677/hw5/src/time-for-n.pl | 14 ++++ 6 files changed, 341 insertions(+), 43 deletions(-) create mode 100644 cs677/hw5/src/Makefile create mode 100644 cs677/hw5/src/floyd-parallel.cc create mode 100644 cs677/hw5/src/floyd-sequential.cc create mode 100644 cs677/hw5/src/gen_adj_matrix.c create mode 100755 cs677/hw5/src/time-for-n.pl diff --git a/cs677/hw5/hw.tex b/cs677/hw5/hw.tex index 5416898..8a7ad08 100644 --- a/cs677/hw5/hw.tex +++ b/cs677/hw5/hw.tex @@ -12,8 +12,8 @@ \renewcommand{\headrulewidth}{0pt} \renewcommand{\footrulewidth}{0pt} \fancyhf{} -\lhead{HW Chap. 7\\\ \\\ } -\rhead{Josh Holtrop\\2008-10-15\\CS 677} +\lhead{HW Chap. 5\\\ \\\ } +\rhead{Josh Holtrop\\2008-11-05\\CS 677} \rfoot{\thepage} \begin{document} @@ -21,54 +21,31 @@ \noindent \begin{enumerate} \item[1.]{ - Break the ``parallel region'' into a function accepting a \texttt{void *} - parameter. - Before the ``parallel region'' create a \texttt{for} loop which loops - \textit{n} times (where \textit{n} is the number of threads), - invoking \texttt{pthread\_create()} once for each thread. - Any variables local to the function containing the ``parallel region'' - that the ``parallel region'' function needs access to - would have to be stored as pointers in a structure whose address was - passed as an argument to the thread function. - Then, the thread would run the code in the ``parallel region''. - After the region, a \texttt{for} loop would exist to loop over all - the threads created in the first loop and execute \texttt{pthread\_join()} - for each one. + The best known sequential sorting algorithms have a complexity of $O (n \log n)$. + So, the speedup factor is given by + $$ s = \frac{T_s}{T_p} = \frac{n \log n}{cn} = \frac{\log n}{c} $$ } -\vskip 2em \item[2.]{ - Each thread could store its result into an array indexed by its ID. - Then, when computation is complete, a regular \texttt{for} loop - within an OpenMP parallel region could iterate - $\lceil \log_2 n \rceil$ times. - In the first iteration, threads where $ID\mod 2 = 0$ would perform - the reduction operation on their array value and the array value - at index $ID + 1$ while the rest of the threads are idle. - In the second iteration, threads where $ID\mod 4 = 0$ would perform - the reduction operation on their array value and the array value - at index $ID + 2$ while the rest of the threads are idle. - This process would repeat (doubling the mod value and offset index - each time) until the reduction operation has been - performed to produce the final result value at index 0 of the - array. + The total processing time when the program is run on $p$ processors + will be given by the initialization phase plus the compute phase + divided by $p$ processors. + So, the speedup is given by + $$ s = \frac{T_s}{T_p} = \frac{n + n^3}{n + \frac{n^3}{p}} $$ } -\vskip 2em \item[3.]{ - My OpenMP solution to Floyd's algorithm was implemented by - using a \texttt{\#pragma omp parallel for} on the second \texttt{for} - loop of the algorithm. - Thus, for each $k$ value, the rows are broken up for different - threads to process. - The same thread computes an entire row of the matrix. + Using Amdahl's law, the maximum speedup is $1/f$, where $f$ is the + serial fraction of execution time. + So, the maximum fraction of execution time a program can spend on + serial code if the parallel version must achieve a speedup + factor of 10 is 10\%. +} - The run times nicely grow exponentially as $n$ grows linearly. - On eos24, with $n >= 400$, the speedup was $\approx 3.6$. - - As the number of threads increased, the run time decreased - exponentially until $t > 4$, where more threads did not gain - anything since there were only 4 processing cores. +\vskip 1em +\item[4.]{ + Using Gustafson's law, the scaled speedup factor is given by + $$ S_G = p + (1 - p) T_s = 8 + (1 - 8) \frac{1}{24} = 7.708 $$ } \end{enumerate} diff --git a/cs677/hw5/src/Makefile b/cs677/hw5/src/Makefile new file mode 100644 index 0000000..bd5a190 --- /dev/null +++ b/cs677/hw5/src/Makefile @@ -0,0 +1,17 @@ + +TARGETS := gen_adj_matrix +TARGETS += floyd-sequential +TARGETS += floyd-parallel + +CXXFLAGS := -fopenmp +#CXXFLAGS += -DPRINT_RESULT + +OBJS := $(foreach target,$(TARGETS),$(target).o) + +all: $(TARGETS) + +$(TARGETS): $(OBJS) + $(CXX) -o $@ $@.o $(CXXFLAGS) + +clean: + -rm -f *~ *.o $(TARGETS) diff --git a/cs677/hw5/src/floyd-parallel.cc b/cs677/hw5/src/floyd-parallel.cc new file mode 100644 index 0000000..1b169ca --- /dev/null +++ b/cs677/hw5/src/floyd-parallel.cc @@ -0,0 +1,126 @@ + +/* Josh Holtrop + * 2008-10-15 + * CS 677 + * Grand Valley State University + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* gettimeofday(), struct timeval */ +using namespace std; + +void usage(char * progname); +int readFile(char * fileName, vector & v); +void convertToMatrix(const vector & v, int num_verts, int * vals); + +void usage(char * progname) +{ + cout << "Usage: " << progname << " " << endl; + exit(42); +} + +int main(int argc, char * argv[]) +{ + if (argc < 1) + usage(argv[0]); + + vector v; + + int num_verts = readFile(argv[1], v); + int D[2][num_verts][num_verts]; + convertToMatrix(v, num_verts, (int *) &D[0]); + + struct timeval before, after; + gettimeofday(&before, NULL); /* Start timing */ + + /* Run Floyd's Algorithm on D */ + for (int k = 1; k <= num_verts; k++) + { +#pragma omp parallel for + for (int i = 0; i < num_verts; i++) + { + for (int j = 0; j < num_verts; j++) + { + int distWithoutK = D[(k-1) & 1][i][j]; + int distItoK = D[(k-1) & 1][i][k-1]; + int distKtoJ = D[(k-1) & 1][k-1][j]; + int distWithK = + (distItoK == INT_MAX || distKtoJ == INT_MAX) + ? INT_MAX + : distItoK + distKtoJ; + D[k & 1][i][j] = min( + distWithoutK, + distWithK + ); + } + } + } + + gettimeofday(&after, NULL); /* Stop timing */ + +#ifdef PRINT_RESULT + cout << "Result:" << endl; + /* Print out the final matrix */ + for (int i = 0; i < num_verts; i++) + { + for (int j = 0; j < num_verts; j++) + { + if (D[num_verts & 1][i][j] == INT_MAX) + printf("-- "); + else + printf("%2d ", D[num_verts & 1][i][j]); + } + printf("\n"); + } +#endif + + double time_before = before.tv_sec + before.tv_usec / 1000000.0; + double time_after = after.tv_sec + after.tv_usec / 1000000.0; + double diff = time_after - time_before; + cout << "Elapsed time: " << diff << " seconds." << endl; + + return 0; +} + +int readFile(char * fileName, vector & v) +{ + ifstream in(fileName); + if (!in.is_open()) + { + cerr << "Error opening " << fileName << endl; + return -1; + } + + for (;;) + { + int weight; + in >> weight; + if (in.eof()) + break; + v.push_back(weight); + } + + return (int) sqrt(v.size()); +} + +void convertToMatrix(const vector & v, int num_verts, int * vals) +{ + int vidx = 0; + int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals; + for (int i = 0; i < num_verts; i++) + { + for (int j = 0; j < num_verts; j++) + { + (*V)[i][j] = v[vidx++]; + if ((*V)[i][j] == 0) + (*V)[i][j] = INT_MAX; + } + } +} diff --git a/cs677/hw5/src/floyd-sequential.cc b/cs677/hw5/src/floyd-sequential.cc new file mode 100644 index 0000000..4bc862f --- /dev/null +++ b/cs677/hw5/src/floyd-sequential.cc @@ -0,0 +1,125 @@ + +/* Josh Holtrop + * 2008-10-15 + * CS 677 + * Grand Valley State University + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include /* gettimeofday(), struct timeval */ +using namespace std; + +void usage(char * progname); +int readFile(char * fileName, vector & v); +void convertToMatrix(const vector & v, int num_verts, int * vals); + +void usage(char * progname) +{ + cout << "Usage: " << progname << " " << endl; + exit(42); +} + +int main(int argc, char * argv[]) +{ + if (argc < 1) + usage(argv[0]); + + vector v; + + int num_verts = readFile(argv[1], v); + int D[2][num_verts][num_verts]; + convertToMatrix(v, num_verts, (int *) &D[0]); + + struct timeval before, after; + gettimeofday(&before, NULL); /* Start timing */ + + /* Run Floyd's Algorithm on D */ + for (int k = 1; k <= num_verts; k++) + { + for (int i = 0; i < num_verts; i++) + { + for (int j = 0; j < num_verts; j++) + { + int distWithoutK = D[(k-1) & 1][i][j]; + int distItoK = D[(k-1) & 1][i][k-1]; + int distKtoJ = D[(k-1) & 1][k-1][j]; + int distWithK = + (distItoK == INT_MAX || distKtoJ == INT_MAX) + ? INT_MAX + : distItoK + distKtoJ; + D[k & 1][i][j] = min( + distWithoutK, + distWithK + ); + } + } + } + + gettimeofday(&after, NULL); /* Stop timing */ + +#ifdef PRINT_RESULT + cout << "Result:" << endl; + /* Print out the final matrix */ + for (int i = 0; i < num_verts; i++) + { + for (int j = 0; j < num_verts; j++) + { + if (D[num_verts & 1][i][j] == INT_MAX) + printf("-- "); + else + printf("%2d ", D[num_verts & 1][i][j]); + } + printf("\n"); + } +#endif + + double time_before = before.tv_sec + before.tv_usec / 1000000.0; + double time_after = after.tv_sec + after.tv_usec / 1000000.0; + double diff = time_after - time_before; + cout << "Elapsed time: " << diff << " seconds." << endl; + + return 0; +} + +int readFile(char * fileName, vector & v) +{ + ifstream in(fileName); + if (!in.is_open()) + { + cerr << "Error opening " << fileName << endl; + return -1; + } + + for (;;) + { + int weight; + in >> weight; + if (in.eof()) + break; + v.push_back(weight); + } + + return (int) sqrt(v.size()); +} + +void convertToMatrix(const vector & v, int num_verts, int * vals) +{ + int vidx = 0; + int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals; + for (int i = 0; i < num_verts; i++) + { + for (int j = 0; j < num_verts; j++) + { + (*V)[i][j] = v[vidx++]; + if ((*V)[i][j] == 0) + (*V)[i][j] = INT_MAX; + } + } +} diff --git a/cs677/hw5/src/gen_adj_matrix.c b/cs677/hw5/src/gen_adj_matrix.c new file mode 100644 index 0000000..7e09739 --- /dev/null +++ b/cs677/hw5/src/gen_adj_matrix.c @@ -0,0 +1,39 @@ +// gen_adj_matrix.c +// generates random adjacency matrix of desired size and connectivity +// gw + +#include +#include + +int main(int argc, char *argv[]) +{ + int i, j; + FILE *fp; + int numVertices, avgConnectivity; + + // get parameters + if (argc != 3) { + printf ("usage: progName numVertices avgConnectivity\n"); + exit(-1); + } + else { + numVertices = atoi(argv[1]); + avgConnectivity = atoi(argv[2]); + } + + // open/create output file + if ((fp = fopen ("adjacency.dat", "w")) == NULL) { + printf ("coulnd not create file\n"); + exit(-1); + } + + // generate random graph/matrix + for (i=0; i < numVertices; i++) + for (j=0; j < numVertices; j++) + if (((rand() % numVertices) < avgConnectivity) && i!=j) + fprintf (fp, "%d ", 1 + (rand() % 20)); + else + fprintf (fp, "%d ", 0); + fclose (fp); + return 0; +} diff --git a/cs677/hw5/src/time-for-n.pl b/cs677/hw5/src/time-for-n.pl new file mode 100755 index 0000000..8adc48d --- /dev/null +++ b/cs677/hw5/src/time-for-n.pl @@ -0,0 +1,14 @@ +#!/usr/bin/perl + +use strict; +use warnings; + +for (my $n = 100; $n <= 1000; $n += 100) +{ + print("./gen_adj_matrix $n 6\n"); + system('./gen_adj_matrix', $n, 6); + print("./floyd-sequential adjacency.dat\n"); + system('./floyd-sequential', 'adjacency.dat'); + print("./floyd-parallel adjacency.dat\n"); + system('./floyd-parallel', 'adjacency.dat'); +}