updating hw.tex, added src from hw7

git-svn-id: svn://anubis/gvsu@227 45c1a28c-8058-47b2-ae61-ca45b979098e
2008-11-01 16:56:50 +00:00 · 2008-11-01 16:56:50 +00:00 · 446b8464b3
commit 446b8464b3
parent 0c6614e89e
6 changed files with 341 additions and 43 deletions
--- a/cs677/hw5/hw.tex
+++ b/cs677/hw5/hw.tex
@ -12,8 +12,8 @@
 \renewcommand{\headrulewidth}{0pt}
 \renewcommand{\footrulewidth}{0pt}
 \fancyhf{}
-\lhead{HW Chap. 7\\\ \\\ }
-\rhead{Josh Holtrop\\2008-10-15\\CS 677}
+\lhead{HW Chap. 5\\\ \\\ }
+\rhead{Josh Holtrop\\2008-11-05\\CS 677}
 \rfoot{\thepage}

 \begin{document}
@ -21,54 +21,31 @@
 \noindent
 \begin{enumerate}
 \item[1.]{
-    Break the ``parallel region'' into a function accepting a \texttt{void *}
-    parameter.
-    Before the ``parallel region'' create a \texttt{for} loop which loops
-    \textit{n} times (where \textit{n} is the number of threads),
-    invoking \texttt{pthread\_create()} once for each thread.
-    Any variables local to the function containing the ``parallel region''
-    that the ``parallel region'' function needs access to
-    would have to be stored as pointers in a structure whose address was
-    passed as an argument to the thread function.
-    Then, the thread would run the code in the ``parallel region''.
-    After the region, a \texttt{for} loop would exist to loop over all
-    the threads created in the first loop and execute \texttt{pthread\_join()}
-    for each one.
+    The best known sequential sorting algorithms have a complexity of $O (n \log n)$.
+    So, the speedup factor is given by
+    $$ s = \frac{T_s}{T_p} = \frac{n \log n}{cn} = \frac{\log n}{c} $$
 }

-\vskip 2em
 \item[2.]{
-    Each thread could store its result into an array indexed by its ID.
-    Then, when computation is complete, a regular \texttt{for} loop
-    within an OpenMP parallel region could iterate
-    $\lceil \log_2 n \rceil$ times.
-    In the first iteration, threads where $ID\mod 2 = 0$ would perform
-    the reduction operation on their array value and the array value
-    at index $ID + 1$ while the rest of the threads are idle.
-    In the second iteration, threads where $ID\mod 4 = 0$ would perform
-    the reduction operation on their array value and the array value
-    at index $ID + 2$ while the rest of the threads are idle.
-    This process would repeat (doubling the mod value and offset index
-    each time) until the reduction operation has been
-    performed to produce the final result value at index 0 of the
-    array.
+    The total processing time when the program is run on $p$ processors
+        will be given by the initialization phase plus the compute phase
+        divided by $p$ processors.
+    So, the speedup is given by
+    $$ s = \frac{T_s}{T_p} = \frac{n + n^3}{n + \frac{n^3}{p}} $$
 }

-\vskip 2em
 \item[3.]{
-    My OpenMP solution to Floyd's algorithm was implemented by
-    using a \texttt{\#pragma omp parallel for} on the second \texttt{for}
-    loop of the algorithm.
-    Thus, for each $k$ value, the rows are broken up for different
-    threads to process.
-    The same thread computes an entire row of the matrix.
+    Using Amdahl's law, the maximum speedup is $1/f$, where $f$ is the
+        serial fraction of execution time.
+    So, the maximum fraction of execution time a program can spend on
+        serial code if the parallel version must achieve a speedup
+        factor of 10 is 10\%.
+}

-    The run times nicely grow exponentially as $n$ grows linearly.
-    On eos24, with $n >= 400$, the speedup was $\approx 3.6$.
-
-    As the number of threads increased, the run time decreased
-    exponentially until $t > 4$, where more threads did not gain
-    anything since there were only 4 processing cores.
+\vskip 1em
+\item[4.]{
+    Using Gustafson's law, the scaled speedup factor is given by
+    $$ S_G = p + (1 - p) T_s = 8 + (1 - 8) \frac{1}{24} = 7.708 $$
 }

 \end{enumerate}
--- a/cs677/hw5/src/Makefile
+++ b/cs677/hw5/src/Makefile
@ -0,0 +1,17 @@
+
+TARGETS := gen_adj_matrix
+TARGETS += floyd-sequential
+TARGETS += floyd-parallel
+
+CXXFLAGS := -fopenmp
+#CXXFLAGS += -DPRINT_RESULT
+
+OBJS := $(foreach target,$(TARGETS),$(target).o)
+
+all: $(TARGETS)
+
+$(TARGETS): $(OBJS)
+	$(CXX) -o $@ $@.o $(CXXFLAGS)
+
+clean:
+	-rm -f *~ *.o $(TARGETS)
--- a/cs677/hw5/src/floyd-parallel.cc
+++ b/cs677/hw5/src/floyd-parallel.cc
@ -0,0 +1,126 @@
+
+/* Josh Holtrop
+ * 2008-10-15
+ * CS 677
+ * Grand Valley State University
+ */
+
+#include <omp.h>
+#include <math.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <sys/time.h>   /* gettimeofday(), struct timeval */
+using namespace std;
+
+void usage(char * progname);
+int readFile(char * fileName, vector<int> & v);
+void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
+
+void usage(char * progname)
+{
+    cout << "Usage: " << progname << " <adjacency-file>" << endl;
+    exit(42);
+}
+
+int main(int argc, char * argv[])
+{
+    if (argc < 1)
+        usage(argv[0]);
+
+    vector<int> v;
+
+    int num_verts = readFile(argv[1], v);
+    int D[2][num_verts][num_verts];
+    convertToMatrix(v, num_verts, (int *) &D[0]);
+
+    struct timeval before, after;
+    gettimeofday(&before, NULL);        /* Start timing */
+
+    /* Run Floyd's Algorithm on D */
+    for (int k = 1; k <= num_verts; k++)
+    {
+#pragma omp parallel for
+        for (int i = 0; i < num_verts; i++)
+        {
+            for (int j = 0; j < num_verts; j++)
+            {
+                int distWithoutK = D[(k-1) & 1][i][j];
+                int distItoK = D[(k-1) & 1][i][k-1];
+                int distKtoJ = D[(k-1) & 1][k-1][j];
+                int distWithK =
+                    (distItoK == INT_MAX || distKtoJ == INT_MAX)
+                        ? INT_MAX
+                        : distItoK + distKtoJ;
+                D[k & 1][i][j] = min(
+                    distWithoutK,
+                    distWithK
+                );
+            }
+        }
+    }
+
+    gettimeofday(&after, NULL);         /* Stop timing */
+
+#ifdef PRINT_RESULT
+    cout << "Result:" << endl;
+    /* Print out the final matrix */
+    for (int i = 0; i < num_verts; i++)
+    {
+        for (int j = 0; j < num_verts; j++)
+        {
+            if (D[num_verts & 1][i][j] == INT_MAX)
+                printf("-- ");
+            else
+                printf("%2d ", D[num_verts & 1][i][j]);
+        }
+        printf("\n");
+    }
+#endif
+
+    double time_before = before.tv_sec + before.tv_usec / 1000000.0;
+    double time_after = after.tv_sec + after.tv_usec / 1000000.0;
+    double diff = time_after - time_before;
+    cout << "Elapsed time: " << diff << " seconds." << endl;
+
+    return 0;
+}
+
+int readFile(char * fileName, vector<int> & v)
+{
+    ifstream in(fileName);
+    if (!in.is_open())
+    {
+        cerr << "Error opening " << fileName << endl;
+        return -1;
+    }
+
+    for (;;)
+    {
+        int weight;
+        in >> weight;
+        if (in.eof())
+            break;
+        v.push_back(weight);
+    }
+
+    return (int) sqrt(v.size());
+}
+
+void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
+{
+    int vidx = 0;
+    int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
+    for (int i = 0; i < num_verts; i++)
+    {
+        for (int j = 0; j < num_verts; j++)
+        {
+            (*V)[i][j] = v[vidx++];
+            if ((*V)[i][j] == 0)
+                (*V)[i][j] = INT_MAX;
+        }
+    }
+}
--- a/cs677/hw5/src/floyd-sequential.cc
+++ b/cs677/hw5/src/floyd-sequential.cc
@ -0,0 +1,125 @@
+
+/* Josh Holtrop
+ * 2008-10-15
+ * CS 677
+ * Grand Valley State University
+ */
+
+#include <omp.h>
+#include <math.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <stdio.h>
+#include <iostream>
+#include <fstream>
+#include <vector>
+#include <sys/time.h>   /* gettimeofday(), struct timeval */
+using namespace std;
+
+void usage(char * progname);
+int readFile(char * fileName, vector<int> & v);
+void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
+
+void usage(char * progname)
+{
+    cout << "Usage: " << progname << " <adjacency-file>" << endl;
+    exit(42);
+}
+
+int main(int argc, char * argv[])
+{
+    if (argc < 1)
+        usage(argv[0]);
+
+    vector<int> v;
+
+    int num_verts = readFile(argv[1], v);
+    int D[2][num_verts][num_verts];
+    convertToMatrix(v, num_verts, (int *) &D[0]);
+
+    struct timeval before, after;
+    gettimeofday(&before, NULL);        /* Start timing */
+
+    /* Run Floyd's Algorithm on D */
+    for (int k = 1; k <= num_verts; k++)
+    {
+        for (int i = 0; i < num_verts; i++)
+        {
+            for (int j = 0; j < num_verts; j++)
+            {
+                int distWithoutK = D[(k-1) & 1][i][j];
+                int distItoK = D[(k-1) & 1][i][k-1];
+                int distKtoJ = D[(k-1) & 1][k-1][j];
+                int distWithK =
+                    (distItoK == INT_MAX || distKtoJ == INT_MAX)
+                        ? INT_MAX
+                        : distItoK + distKtoJ;
+                D[k & 1][i][j] = min(
+                    distWithoutK,
+                    distWithK
+                );
+            }
+        }
+    }
+
+    gettimeofday(&after, NULL);         /* Stop timing */
+
+#ifdef PRINT_RESULT
+    cout << "Result:" << endl;
+    /* Print out the final matrix */
+    for (int i = 0; i < num_verts; i++)
+    {
+        for (int j = 0; j < num_verts; j++)
+        {
+            if (D[num_verts & 1][i][j] == INT_MAX)
+                printf("-- ");
+            else
+                printf("%2d ", D[num_verts & 1][i][j]);
+        }
+        printf("\n");
+    }
+#endif
+
+    double time_before = before.tv_sec + before.tv_usec / 1000000.0;
+    double time_after = after.tv_sec + after.tv_usec / 1000000.0;
+    double diff = time_after - time_before;
+    cout << "Elapsed time: " << diff << " seconds." << endl;
+
+    return 0;
+}
+
+int readFile(char * fileName, vector<int> & v)
+{
+    ifstream in(fileName);
+    if (!in.is_open())
+    {
+        cerr << "Error opening " << fileName << endl;
+        return -1;
+    }
+
+    for (;;)
+    {
+        int weight;
+        in >> weight;
+        if (in.eof())
+            break;
+        v.push_back(weight);
+    }
+
+    return (int) sqrt(v.size());
+}
+
+void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
+{
+    int vidx = 0;
+    int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
+    for (int i = 0; i < num_verts; i++)
+    {
+        for (int j = 0; j < num_verts; j++)
+        {
+            (*V)[i][j] = v[vidx++];
+            if ((*V)[i][j] == 0)
+                (*V)[i][j] = INT_MAX;
+        }
+    }
+}
--- a/cs677/hw5/src/gen_adj_matrix.c
+++ b/cs677/hw5/src/gen_adj_matrix.c
@ -0,0 +1,39 @@
+// gen_adj_matrix.c
+// generates random adjacency matrix of desired size and connectivity
+// gw
+
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[])
+{
+	int i, j;
+	FILE *fp;
+	int numVertices, avgConnectivity;
+
+	// get parameters
+	if (argc != 3) {
+		printf ("usage: progName numVertices avgConnectivity\n");
+		exit(-1);
+	}
+	else {
+		numVertices = atoi(argv[1]);
+		avgConnectivity = atoi(argv[2]);
+	}
+
+	//  open/create output file
+	if ((fp = fopen ("adjacency.dat", "w")) == NULL) {
+		printf ("coulnd not create file\n");
+		exit(-1);
+	}
+
+	// generate random graph/matrix
+	for (i=0; i < numVertices; i++)
+		for (j=0; j < numVertices; j++)
+			if (((rand() % numVertices) < avgConnectivity) && i!=j)
+				fprintf (fp, "%d ",  1 + (rand() % 20));
+			else
+				fprintf (fp, "%d ", 0);
+	fclose (fp);
+	return 0;
+}
--- a/cs677/hw5/src/time-for-n.pl
+++ b/cs677/hw5/src/time-for-n.pl
@ -0,0 +1,14 @@
+#!/usr/bin/perl
+
+use strict;
+use warnings;
+
+for (my $n = 100; $n <= 1000; $n += 100)
+{
+    print("./gen_adj_matrix $n 6\n");
+    system('./gen_adj_matrix', $n, 6);
+    print("./floyd-sequential adjacency.dat\n");
+    system('./floyd-sequential', 'adjacency.dat');
+    print("./floyd-parallel adjacency.dat\n");
+    system('./floyd-parallel', 'adjacency.dat');
+}