updating hw.tex, added src from hw7
git-svn-id: svn://anubis/gvsu@227 45c1a28c-8058-47b2-ae61-ca45b979098e
This commit is contained in:
parent
0c6614e89e
commit
446b8464b3
@ -12,8 +12,8 @@
|
||||
\renewcommand{\headrulewidth}{0pt}
|
||||
\renewcommand{\footrulewidth}{0pt}
|
||||
\fancyhf{}
|
||||
\lhead{HW Chap. 7\\\ \\\ }
|
||||
\rhead{Josh Holtrop\\2008-10-15\\CS 677}
|
||||
\lhead{HW Chap. 5\\\ \\\ }
|
||||
\rhead{Josh Holtrop\\2008-11-05\\CS 677}
|
||||
\rfoot{\thepage}
|
||||
|
||||
\begin{document}
|
||||
@ -21,54 +21,31 @@
|
||||
\noindent
|
||||
\begin{enumerate}
|
||||
\item[1.]{
|
||||
Break the ``parallel region'' into a function accepting a \texttt{void *}
|
||||
parameter.
|
||||
Before the ``parallel region'' create a \texttt{for} loop which loops
|
||||
\textit{n} times (where \textit{n} is the number of threads),
|
||||
invoking \texttt{pthread\_create()} once for each thread.
|
||||
Any variables local to the function containing the ``parallel region''
|
||||
that the ``parallel region'' function needs access to
|
||||
would have to be stored as pointers in a structure whose address was
|
||||
passed as an argument to the thread function.
|
||||
Then, the thread would run the code in the ``parallel region''.
|
||||
After the region, a \texttt{for} loop would exist to loop over all
|
||||
the threads created in the first loop and execute \texttt{pthread\_join()}
|
||||
for each one.
|
||||
The best known sequential sorting algorithms have a complexity of $O (n \log n)$.
|
||||
So, the speedup factor is given by
|
||||
$$ s = \frac{T_s}{T_p} = \frac{n \log n}{cn} = \frac{\log n}{c} $$
|
||||
}
|
||||
|
||||
\vskip 2em
|
||||
\item[2.]{
|
||||
Each thread could store its result into an array indexed by its ID.
|
||||
Then, when computation is complete, a regular \texttt{for} loop
|
||||
within an OpenMP parallel region could iterate
|
||||
$\lceil \log_2 n \rceil$ times.
|
||||
In the first iteration, threads where $ID\mod 2 = 0$ would perform
|
||||
the reduction operation on their array value and the array value
|
||||
at index $ID + 1$ while the rest of the threads are idle.
|
||||
In the second iteration, threads where $ID\mod 4 = 0$ would perform
|
||||
the reduction operation on their array value and the array value
|
||||
at index $ID + 2$ while the rest of the threads are idle.
|
||||
This process would repeat (doubling the mod value and offset index
|
||||
each time) until the reduction operation has been
|
||||
performed to produce the final result value at index 0 of the
|
||||
array.
|
||||
The total processing time when the program is run on $p$ processors
|
||||
will be given by the initialization phase plus the compute phase
|
||||
divided by $p$ processors.
|
||||
So, the speedup is given by
|
||||
$$ s = \frac{T_s}{T_p} = \frac{n + n^3}{n + \frac{n^3}{p}} $$
|
||||
}
|
||||
|
||||
\vskip 2em
|
||||
\item[3.]{
|
||||
My OpenMP solution to Floyd's algorithm was implemented by
|
||||
using a \texttt{\#pragma omp parallel for} on the second \texttt{for}
|
||||
loop of the algorithm.
|
||||
Thus, for each $k$ value, the rows are broken up for different
|
||||
threads to process.
|
||||
The same thread computes an entire row of the matrix.
|
||||
Using Amdahl's law, the maximum speedup is $1/f$, where $f$ is the
|
||||
serial fraction of execution time.
|
||||
So, the maximum fraction of execution time a program can spend on
|
||||
serial code if the parallel version must achieve a speedup
|
||||
factor of 10 is 10\%.
|
||||
}
|
||||
|
||||
The run times nicely grow exponentially as $n$ grows linearly.
|
||||
On eos24, with $n >= 400$, the speedup was $\approx 3.6$.
|
||||
|
||||
As the number of threads increased, the run time decreased
|
||||
exponentially until $t > 4$, where more threads did not gain
|
||||
anything since there were only 4 processing cores.
|
||||
\vskip 1em
|
||||
\item[4.]{
|
||||
Using Gustafson's law, the scaled speedup factor is given by
|
||||
$$ S_G = p + (1 - p) T_s = 8 + (1 - 8) \frac{1}{24} = 7.708 $$
|
||||
}
|
||||
|
||||
\end{enumerate}
|
||||
|
17
cs677/hw5/src/Makefile
Normal file
17
cs677/hw5/src/Makefile
Normal file
@ -0,0 +1,17 @@
|
||||
|
||||
TARGETS := gen_adj_matrix
|
||||
TARGETS += floyd-sequential
|
||||
TARGETS += floyd-parallel
|
||||
|
||||
CXXFLAGS := -fopenmp
|
||||
#CXXFLAGS += -DPRINT_RESULT
|
||||
|
||||
OBJS := $(foreach target,$(TARGETS),$(target).o)
|
||||
|
||||
all: $(TARGETS)
|
||||
|
||||
$(TARGETS): $(OBJS)
|
||||
$(CXX) -o $@ $@.o $(CXXFLAGS)
|
||||
|
||||
clean:
|
||||
-rm -f *~ *.o $(TARGETS)
|
126
cs677/hw5/src/floyd-parallel.cc
Normal file
126
cs677/hw5/src/floyd-parallel.cc
Normal file
@ -0,0 +1,126 @@
|
||||
|
||||
/* Josh Holtrop
|
||||
* 2008-10-15
|
||||
* CS 677
|
||||
* Grand Valley State University
|
||||
*/
|
||||
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <sys/time.h> /* gettimeofday(), struct timeval */
|
||||
using namespace std;
|
||||
|
||||
void usage(char * progname);
|
||||
int readFile(char * fileName, vector<int> & v);
|
||||
void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
|
||||
|
||||
void usage(char * progname)
|
||||
{
|
||||
cout << "Usage: " << progname << " <adjacency-file>" << endl;
|
||||
exit(42);
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
if (argc < 1)
|
||||
usage(argv[0]);
|
||||
|
||||
vector<int> v;
|
||||
|
||||
int num_verts = readFile(argv[1], v);
|
||||
int D[2][num_verts][num_verts];
|
||||
convertToMatrix(v, num_verts, (int *) &D[0]);
|
||||
|
||||
struct timeval before, after;
|
||||
gettimeofday(&before, NULL); /* Start timing */
|
||||
|
||||
/* Run Floyd's Algorithm on D */
|
||||
for (int k = 1; k <= num_verts; k++)
|
||||
{
|
||||
#pragma omp parallel for
|
||||
for (int i = 0; i < num_verts; i++)
|
||||
{
|
||||
for (int j = 0; j < num_verts; j++)
|
||||
{
|
||||
int distWithoutK = D[(k-1) & 1][i][j];
|
||||
int distItoK = D[(k-1) & 1][i][k-1];
|
||||
int distKtoJ = D[(k-1) & 1][k-1][j];
|
||||
int distWithK =
|
||||
(distItoK == INT_MAX || distKtoJ == INT_MAX)
|
||||
? INT_MAX
|
||||
: distItoK + distKtoJ;
|
||||
D[k & 1][i][j] = min(
|
||||
distWithoutK,
|
||||
distWithK
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday(&after, NULL); /* Stop timing */
|
||||
|
||||
#ifdef PRINT_RESULT
|
||||
cout << "Result:" << endl;
|
||||
/* Print out the final matrix */
|
||||
for (int i = 0; i < num_verts; i++)
|
||||
{
|
||||
for (int j = 0; j < num_verts; j++)
|
||||
{
|
||||
if (D[num_verts & 1][i][j] == INT_MAX)
|
||||
printf("-- ");
|
||||
else
|
||||
printf("%2d ", D[num_verts & 1][i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
double time_before = before.tv_sec + before.tv_usec / 1000000.0;
|
||||
double time_after = after.tv_sec + after.tv_usec / 1000000.0;
|
||||
double diff = time_after - time_before;
|
||||
cout << "Elapsed time: " << diff << " seconds." << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int readFile(char * fileName, vector<int> & v)
|
||||
{
|
||||
ifstream in(fileName);
|
||||
if (!in.is_open())
|
||||
{
|
||||
cerr << "Error opening " << fileName << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int weight;
|
||||
in >> weight;
|
||||
if (in.eof())
|
||||
break;
|
||||
v.push_back(weight);
|
||||
}
|
||||
|
||||
return (int) sqrt(v.size());
|
||||
}
|
||||
|
||||
void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
|
||||
{
|
||||
int vidx = 0;
|
||||
int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
|
||||
for (int i = 0; i < num_verts; i++)
|
||||
{
|
||||
for (int j = 0; j < num_verts; j++)
|
||||
{
|
||||
(*V)[i][j] = v[vidx++];
|
||||
if ((*V)[i][j] == 0)
|
||||
(*V)[i][j] = INT_MAX;
|
||||
}
|
||||
}
|
||||
}
|
125
cs677/hw5/src/floyd-sequential.cc
Normal file
125
cs677/hw5/src/floyd-sequential.cc
Normal file
@ -0,0 +1,125 @@
|
||||
|
||||
/* Josh Holtrop
|
||||
* 2008-10-15
|
||||
* CS 677
|
||||
* Grand Valley State University
|
||||
*/
|
||||
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <stdio.h>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <sys/time.h> /* gettimeofday(), struct timeval */
|
||||
using namespace std;
|
||||
|
||||
void usage(char * progname);
|
||||
int readFile(char * fileName, vector<int> & v);
|
||||
void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
|
||||
|
||||
void usage(char * progname)
|
||||
{
|
||||
cout << "Usage: " << progname << " <adjacency-file>" << endl;
|
||||
exit(42);
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[])
|
||||
{
|
||||
if (argc < 1)
|
||||
usage(argv[0]);
|
||||
|
||||
vector<int> v;
|
||||
|
||||
int num_verts = readFile(argv[1], v);
|
||||
int D[2][num_verts][num_verts];
|
||||
convertToMatrix(v, num_verts, (int *) &D[0]);
|
||||
|
||||
struct timeval before, after;
|
||||
gettimeofday(&before, NULL); /* Start timing */
|
||||
|
||||
/* Run Floyd's Algorithm on D */
|
||||
for (int k = 1; k <= num_verts; k++)
|
||||
{
|
||||
for (int i = 0; i < num_verts; i++)
|
||||
{
|
||||
for (int j = 0; j < num_verts; j++)
|
||||
{
|
||||
int distWithoutK = D[(k-1) & 1][i][j];
|
||||
int distItoK = D[(k-1) & 1][i][k-1];
|
||||
int distKtoJ = D[(k-1) & 1][k-1][j];
|
||||
int distWithK =
|
||||
(distItoK == INT_MAX || distKtoJ == INT_MAX)
|
||||
? INT_MAX
|
||||
: distItoK + distKtoJ;
|
||||
D[k & 1][i][j] = min(
|
||||
distWithoutK,
|
||||
distWithK
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gettimeofday(&after, NULL); /* Stop timing */
|
||||
|
||||
#ifdef PRINT_RESULT
|
||||
cout << "Result:" << endl;
|
||||
/* Print out the final matrix */
|
||||
for (int i = 0; i < num_verts; i++)
|
||||
{
|
||||
for (int j = 0; j < num_verts; j++)
|
||||
{
|
||||
if (D[num_verts & 1][i][j] == INT_MAX)
|
||||
printf("-- ");
|
||||
else
|
||||
printf("%2d ", D[num_verts & 1][i][j]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
double time_before = before.tv_sec + before.tv_usec / 1000000.0;
|
||||
double time_after = after.tv_sec + after.tv_usec / 1000000.0;
|
||||
double diff = time_after - time_before;
|
||||
cout << "Elapsed time: " << diff << " seconds." << endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int readFile(char * fileName, vector<int> & v)
|
||||
{
|
||||
ifstream in(fileName);
|
||||
if (!in.is_open())
|
||||
{
|
||||
cerr << "Error opening " << fileName << endl;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (;;)
|
||||
{
|
||||
int weight;
|
||||
in >> weight;
|
||||
if (in.eof())
|
||||
break;
|
||||
v.push_back(weight);
|
||||
}
|
||||
|
||||
return (int) sqrt(v.size());
|
||||
}
|
||||
|
||||
void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
|
||||
{
|
||||
int vidx = 0;
|
||||
int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
|
||||
for (int i = 0; i < num_verts; i++)
|
||||
{
|
||||
for (int j = 0; j < num_verts; j++)
|
||||
{
|
||||
(*V)[i][j] = v[vidx++];
|
||||
if ((*V)[i][j] == 0)
|
||||
(*V)[i][j] = INT_MAX;
|
||||
}
|
||||
}
|
||||
}
|
39
cs677/hw5/src/gen_adj_matrix.c
Normal file
39
cs677/hw5/src/gen_adj_matrix.c
Normal file
@ -0,0 +1,39 @@
|
||||
// gen_adj_matrix.c
|
||||
// generates random adjacency matrix of desired size and connectivity
|
||||
// gw
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
int i, j;
|
||||
FILE *fp;
|
||||
int numVertices, avgConnectivity;
|
||||
|
||||
// get parameters
|
||||
if (argc != 3) {
|
||||
printf ("usage: progName numVertices avgConnectivity\n");
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
numVertices = atoi(argv[1]);
|
||||
avgConnectivity = atoi(argv[2]);
|
||||
}
|
||||
|
||||
// open/create output file
|
||||
if ((fp = fopen ("adjacency.dat", "w")) == NULL) {
|
||||
printf ("coulnd not create file\n");
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
// generate random graph/matrix
|
||||
for (i=0; i < numVertices; i++)
|
||||
for (j=0; j < numVertices; j++)
|
||||
if (((rand() % numVertices) < avgConnectivity) && i!=j)
|
||||
fprintf (fp, "%d ", 1 + (rand() % 20));
|
||||
else
|
||||
fprintf (fp, "%d ", 0);
|
||||
fclose (fp);
|
||||
return 0;
|
||||
}
|
14
cs677/hw5/src/time-for-n.pl
Executable file
14
cs677/hw5/src/time-for-n.pl
Executable file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
|
||||
for (my $n = 100; $n <= 1000; $n += 100)
|
||||
{
|
||||
print("./gen_adj_matrix $n 6\n");
|
||||
system('./gen_adj_matrix', $n, 6);
|
||||
print("./floyd-sequential adjacency.dat\n");
|
||||
system('./floyd-sequential', 'adjacency.dat');
|
||||
print("./floyd-parallel adjacency.dat\n");
|
||||
system('./floyd-parallel', 'adjacency.dat');
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user