updating hw.tex, added src from hw7

git-svn-id: svn://anubis/gvsu@227 45c1a28c-8058-47b2-ae61-ca45b979098e
2008-11-01 16:56:50 +00:00 · 2008-11-01 16:56:50 +00:00 · 446b8464b3
commit 446b8464b3
parent 0c6614e89e
6 changed files with 341 additions and 43 deletions
--- a/cs677/hw5/hw.tex
+++ b/cs677/hw5/hw.tex
@ -12,8 +12,8 @@
 \renewcommand{\headrulewidth}{0pt}
 \renewcommand{\footrulewidth}{0pt}
 \fancyhf{}
-\lhead{HW Chap. 7\\\ \\\ }
+\lhead{HW Chap. 5\\\ \\\ }
-\rhead{Josh Holtrop\\2008-10-15\\CS 677}
+\rhead{Josh Holtrop\\2008-11-05\\CS 677}
 \rfoot{\thepage}
 \begin{document}
@ -21,54 +21,31 @@
 \noindent
 \begin{enumerate}
 \item[1.]{
-    Break the ``parallel region'' into a function accepting a \texttt{void *}
+    The best known sequential sorting algorithms have a complexity of $O (n \log n)$.
-    parameter.
+    So, the speedup factor is given by
-    Before the ``parallel region'' create a \texttt{for} loop which loops
+    $$ s = \frac{T_s}{T_p} = \frac{n \log n}{cn} = \frac{\log n}{c} $$
    \textit{n} times (where \textit{n} is the number of threads),
    invoking \texttt{pthread\_create()} once for each thread.
    Any variables local to the function containing the ``parallel region''
    that the ``parallel region'' function needs access to
    would have to be stored as pointers in a structure whose address was
    passed as an argument to the thread function.
    Then, the thread would run the code in the ``parallel region''.
    After the region, a \texttt{for} loop would exist to loop over all
    the threads created in the first loop and execute \texttt{pthread\_join()}
    for each one.
 }
 \vskip 2em
 \item[2.]{
-    Each thread could store its result into an array indexed by its ID.
+    The total processing time when the program is run on $p$ processors
-    Then, when computation is complete, a regular \texttt{for} loop
+        will be given by the initialization phase plus the compute phase
-    within an OpenMP parallel region could iterate
+        divided by $p$ processors.
-    $\lceil \log_2 n \rceil$ times.
+    So, the speedup is given by
-    In the first iteration, threads where $ID\mod 2 = 0$ would perform
+    $$ s = \frac{T_s}{T_p} = \frac{n + n^3}{n + \frac{n^3}{p}} $$
    the reduction operation on their array value and the array value
    at index $ID + 1$ while the rest of the threads are idle.
    In the second iteration, threads where $ID\mod 4 = 0$ would perform
    the reduction operation on their array value and the array value
    at index $ID + 2$ while the rest of the threads are idle.
    This process would repeat (doubling the mod value and offset index
    each time) until the reduction operation has been
    performed to produce the final result value at index 0 of the
    array.
 }
 \vskip 2em
 \item[3.]{
-    My OpenMP solution to Floyd's algorithm was implemented by
+    Using Amdahl's law, the maximum speedup is $1/f$, where $f$ is the
-    using a \texttt{\#pragma omp parallel for} on the second \texttt{for}
+        serial fraction of execution time.
-    loop of the algorithm.
+    So, the maximum fraction of execution time a program can spend on
-    Thus, for each $k$ value, the rows are broken up for different
+        serial code if the parallel version must achieve a speedup
-    threads to process.
+        factor of 10 is 10\%.
-    The same thread computes an entire row of the matrix.
+}
-    The run times nicely grow exponentially as $n$ grows linearly.
+\vskip 1em
-    On eos24, with $n >= 400$, the speedup was $\approx 3.6$.
+\item[4.]{
-
+    Using Gustafson's law, the scaled speedup factor is given by
-    As the number of threads increased, the run time decreased
+    $$ S_G = p + (1 - p) T_s = 8 + (1 - 8) \frac{1}{24} = 7.708 $$
    exponentially until $t > 4$, where more threads did not gain
    anything since there were only 4 processing cores.
 }
 \end{enumerate}
--- a/cs677/hw5/src/Makefile
+++ b/cs677/hw5/src/Makefile
@ -0,0 +1,17 @@
 TARGETS := gen_adj_matrix
 TARGETS += floyd-sequential
 TARGETS += floyd-parallel
 CXXFLAGS := -fopenmp
 #CXXFLAGS += -DPRINT_RESULT
 OBJS := $(foreach target,$(TARGETS),$(target).o)
 all: $(TARGETS)
 $(TARGETS): $(OBJS)
 	$(CXX) -o $@ $@.o $(CXXFLAGS)
 clean:
 	-rm -f *~ *.o $(TARGETS)
--- a/cs677/hw5/src/floyd-parallel.cc
+++ b/cs677/hw5/src/floyd-parallel.cc
@ -0,0 +1,126 @@
 /* Josh Holtrop
 * 2008-10-15
 * CS 677
 * Grand Valley State University
 */
 #include <omp.h>
 #include <math.h>
 #include <stdlib.h>
 #include <limits.h>
 #include <stdio.h>
 #include <iostream>
 #include <fstream>
 #include <vector>
 #include <sys/time.h>   /* gettimeofday(), struct timeval */
 using namespace std;
 void usage(char * progname);
 int readFile(char * fileName, vector<int> & v);
 void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
 void usage(char * progname)
 {
    cout << "Usage: " << progname << " <adjacency-file>" << endl;
    exit(42);
 }
 int main(int argc, char * argv[])
 {
    if (argc < 1)
        usage(argv[0]);
    vector<int> v;
    int num_verts = readFile(argv[1], v);
    int D[2][num_verts][num_verts];
    convertToMatrix(v, num_verts, (int *) &D[0]);
    struct timeval before, after;
    gettimeofday(&before, NULL);        /* Start timing */
    /* Run Floyd's Algorithm on D */
    for (int k = 1; k <= num_verts; k++)
    {
 #pragma omp parallel for
        for (int i = 0; i < num_verts; i++)
        {
            for (int j = 0; j < num_verts; j++)
            {
                int distWithoutK = D[(k-1) & 1][i][j];
                int distItoK = D[(k-1) & 1][i][k-1];
                int distKtoJ = D[(k-1) & 1][k-1][j];
                int distWithK =
                    (distItoK == INT_MAX || distKtoJ == INT_MAX)
                        ? INT_MAX
                        : distItoK + distKtoJ;
                D[k & 1][i][j] = min(
                    distWithoutK,
                    distWithK
                );
            }
        }
    }
    gettimeofday(&after, NULL);         /* Stop timing */
 #ifdef PRINT_RESULT
    cout << "Result:" << endl;
    /* Print out the final matrix */
    for (int i = 0; i < num_verts; i++)
    {
        for (int j = 0; j < num_verts; j++)
        {
            if (D[num_verts & 1][i][j] == INT_MAX)
                printf("-- ");
            else
                printf("%2d ", D[num_verts & 1][i][j]);
        }
        printf("\n");
    }
 #endif
    double time_before = before.tv_sec + before.tv_usec / 1000000.0;
    double time_after = after.tv_sec + after.tv_usec / 1000000.0;
    double diff = time_after - time_before;
    cout << "Elapsed time: " << diff << " seconds." << endl;
    return 0;
 }
 int readFile(char * fileName, vector<int> & v)
 {
    ifstream in(fileName);
    if (!in.is_open())
    {
        cerr << "Error opening " << fileName << endl;
        return -1;
    }
    for (;;)
    {
        int weight;
        in >> weight;
        if (in.eof())
            break;
        v.push_back(weight);
    }
    return (int) sqrt(v.size());
 }
 void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
 {
    int vidx = 0;
    int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
    for (int i = 0; i < num_verts; i++)
    {
        for (int j = 0; j < num_verts; j++)
        {
            (*V)[i][j] = v[vidx++];
            if ((*V)[i][j] == 0)
                (*V)[i][j] = INT_MAX;
        }
    }
 }
--- a/cs677/hw5/src/floyd-sequential.cc
+++ b/cs677/hw5/src/floyd-sequential.cc
@ -0,0 +1,125 @@
 /* Josh Holtrop
 * 2008-10-15
 * CS 677
 * Grand Valley State University
 */
 #include <omp.h>
 #include <math.h>
 #include <stdlib.h>
 #include <limits.h>
 #include <stdio.h>
 #include <iostream>
 #include <fstream>
 #include <vector>
 #include <sys/time.h>   /* gettimeofday(), struct timeval */
 using namespace std;
 void usage(char * progname);
 int readFile(char * fileName, vector<int> & v);
 void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
 void usage(char * progname)
 {
    cout << "Usage: " << progname << " <adjacency-file>" << endl;
    exit(42);
 }
 int main(int argc, char * argv[])
 {
    if (argc < 1)
        usage(argv[0]);
    vector<int> v;
    int num_verts = readFile(argv[1], v);
    int D[2][num_verts][num_verts];
    convertToMatrix(v, num_verts, (int *) &D[0]);
    struct timeval before, after;
    gettimeofday(&before, NULL);        /* Start timing */
    /* Run Floyd's Algorithm on D */
    for (int k = 1; k <= num_verts; k++)
    {
        for (int i = 0; i < num_verts; i++)
        {
            for (int j = 0; j < num_verts; j++)
            {
                int distWithoutK = D[(k-1) & 1][i][j];
                int distItoK = D[(k-1) & 1][i][k-1];
                int distKtoJ = D[(k-1) & 1][k-1][j];
                int distWithK =
                    (distItoK == INT_MAX || distKtoJ == INT_MAX)
                        ? INT_MAX
                        : distItoK + distKtoJ;
                D[k & 1][i][j] = min(
                    distWithoutK,
                    distWithK
                );
            }
        }
    }
    gettimeofday(&after, NULL);         /* Stop timing */
 #ifdef PRINT_RESULT
    cout << "Result:" << endl;
    /* Print out the final matrix */
    for (int i = 0; i < num_verts; i++)
    {
        for (int j = 0; j < num_verts; j++)
        {
            if (D[num_verts & 1][i][j] == INT_MAX)
                printf("-- ");
            else
                printf("%2d ", D[num_verts & 1][i][j]);
        }
        printf("\n");
    }
 #endif
    double time_before = before.tv_sec + before.tv_usec / 1000000.0;
    double time_after = after.tv_sec + after.tv_usec / 1000000.0;
    double diff = time_after - time_before;
    cout << "Elapsed time: " << diff << " seconds." << endl;
    return 0;
 }
 int readFile(char * fileName, vector<int> & v)
 {
    ifstream in(fileName);
    if (!in.is_open())
    {
        cerr << "Error opening " << fileName << endl;
        return -1;
    }
    for (;;)
    {
        int weight;
        in >> weight;
        if (in.eof())
            break;
        v.push_back(weight);
    }
    return (int) sqrt(v.size());
 }
 void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
 {
    int vidx = 0;
    int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
    for (int i = 0; i < num_verts; i++)
    {
        for (int j = 0; j < num_verts; j++)
        {
            (*V)[i][j] = v[vidx++];
            if ((*V)[i][j] == 0)
                (*V)[i][j] = INT_MAX;
        }
    }
 }
--- a/cs677/hw5/src/gen_adj_matrix.c
+++ b/cs677/hw5/src/gen_adj_matrix.c
@ -0,0 +1,39 @@
 // gen_adj_matrix.c
 // generates random adjacency matrix of desired size and connectivity
 // gw
 #include <stdio.h>
 #include <stdlib.h>
 int main(int argc, char *argv[])
 {
 	int i, j;
 	FILE *fp;
 	int numVertices, avgConnectivity;
 	// get parameters
 	if (argc != 3) {
 		printf ("usage: progName numVertices avgConnectivity\n");
 		exit(-1);
 	}
 	else {
 		numVertices = atoi(argv[1]);
 		avgConnectivity = atoi(argv[2]);
 	}
 	//  open/create output file
 	if ((fp = fopen ("adjacency.dat", "w")) == NULL) {
 		printf ("coulnd not create file\n");
 		exit(-1);
 	}
 	// generate random graph/matrix
 	for (i=0; i < numVertices; i++)
 		for (j=0; j < numVertices; j++)
 			if (((rand() % numVertices) < avgConnectivity) && i!=j)
 				fprintf (fp, "%d ",  1 + (rand() % 20));
 			else
 				fprintf (fp, "%d ", 0);
 	fclose (fp);
 	return 0;
 }
--- a/cs677/hw5/src/time-for-n.pl
+++ b/cs677/hw5/src/time-for-n.pl
@ -0,0 +1,14 @@
 #!/usr/bin/perl
 use strict;
 use warnings;
 for (my $n = 100; $n <= 1000; $n += 100)
 {
    print("./gen_adj_matrix $n 6\n");
    system('./gen_adj_matrix', $n, 6);
    print("./floyd-sequential adjacency.dat\n");
    system('./floyd-sequential', 'adjacency.dat');
    print("./floyd-parallel adjacency.dat\n");
    system('./floyd-parallel', 'adjacency.dat');
 }