updating hw.tex, added src from hw7
git-svn-id: svn://anubis/gvsu@227 45c1a28c-8058-47b2-ae61-ca45b979098e
This commit is contained in:
parent
0c6614e89e
commit
446b8464b3
@ -12,8 +12,8 @@
|
|||||||
\renewcommand{\headrulewidth}{0pt}
|
\renewcommand{\headrulewidth}{0pt}
|
||||||
\renewcommand{\footrulewidth}{0pt}
|
\renewcommand{\footrulewidth}{0pt}
|
||||||
\fancyhf{}
|
\fancyhf{}
|
||||||
\lhead{HW Chap. 7\\\ \\\ }
|
\lhead{HW Chap. 5\\\ \\\ }
|
||||||
\rhead{Josh Holtrop\\2008-10-15\\CS 677}
|
\rhead{Josh Holtrop\\2008-11-05\\CS 677}
|
||||||
\rfoot{\thepage}
|
\rfoot{\thepage}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
@ -21,54 +21,31 @@
|
|||||||
\noindent
|
\noindent
|
||||||
\begin{enumerate}
|
\begin{enumerate}
|
||||||
\item[1.]{
|
\item[1.]{
|
||||||
Break the ``parallel region'' into a function accepting a \texttt{void *}
|
The best known sequential sorting algorithms have a complexity of $O (n \log n)$.
|
||||||
parameter.
|
So, the speedup factor is given by
|
||||||
Before the ``parallel region'' create a \texttt{for} loop which loops
|
$$ s = \frac{T_s}{T_p} = \frac{n \log n}{cn} = \frac{\log n}{c} $$
|
||||||
\textit{n} times (where \textit{n} is the number of threads),
|
|
||||||
invoking \texttt{pthread\_create()} once for each thread.
|
|
||||||
Any variables local to the function containing the ``parallel region''
|
|
||||||
that the ``parallel region'' function needs access to
|
|
||||||
would have to be stored as pointers in a structure whose address was
|
|
||||||
passed as an argument to the thread function.
|
|
||||||
Then, the thread would run the code in the ``parallel region''.
|
|
||||||
After the region, a \texttt{for} loop would exist to loop over all
|
|
||||||
the threads created in the first loop and execute \texttt{pthread\_join()}
|
|
||||||
for each one.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
\vskip 2em
|
|
||||||
\item[2.]{
|
\item[2.]{
|
||||||
Each thread could store its result into an array indexed by its ID.
|
The total processing time when the program is run on $p$ processors
|
||||||
Then, when computation is complete, a regular \texttt{for} loop
|
will be given by the initialization phase plus the compute phase
|
||||||
within an OpenMP parallel region could iterate
|
divided by $p$ processors.
|
||||||
$\lceil \log_2 n \rceil$ times.
|
So, the speedup is given by
|
||||||
In the first iteration, threads where $ID\mod 2 = 0$ would perform
|
$$ s = \frac{T_s}{T_p} = \frac{n + n^3}{n + \frac{n^3}{p}} $$
|
||||||
the reduction operation on their array value and the array value
|
|
||||||
at index $ID + 1$ while the rest of the threads are idle.
|
|
||||||
In the second iteration, threads where $ID\mod 4 = 0$ would perform
|
|
||||||
the reduction operation on their array value and the array value
|
|
||||||
at index $ID + 2$ while the rest of the threads are idle.
|
|
||||||
This process would repeat (doubling the mod value and offset index
|
|
||||||
each time) until the reduction operation has been
|
|
||||||
performed to produce the final result value at index 0 of the
|
|
||||||
array.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
\vskip 2em
|
|
||||||
\item[3.]{
|
\item[3.]{
|
||||||
My OpenMP solution to Floyd's algorithm was implemented by
|
Using Amdahl's law, the maximum speedup is $1/f$, where $f$ is the
|
||||||
using a \texttt{\#pragma omp parallel for} on the second \texttt{for}
|
serial fraction of execution time.
|
||||||
loop of the algorithm.
|
So, the maximum fraction of execution time a program can spend on
|
||||||
Thus, for each $k$ value, the rows are broken up for different
|
serial code if the parallel version must achieve a speedup
|
||||||
threads to process.
|
factor of 10 is 10\%.
|
||||||
The same thread computes an entire row of the matrix.
|
}
|
||||||
|
|
||||||
The run times nicely grow exponentially as $n$ grows linearly.
|
\vskip 1em
|
||||||
On eos24, with $n >= 400$, the speedup was $\approx 3.6$.
|
\item[4.]{
|
||||||
|
Using Gustafson's law, the scaled speedup factor is given by
|
||||||
As the number of threads increased, the run time decreased
|
$$ S_G = p + (1 - p) T_s = 8 + (1 - 8) \frac{1}{24} = 7.708 $$
|
||||||
exponentially until $t > 4$, where more threads did not gain
|
|
||||||
anything since there were only 4 processing cores.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
\end{enumerate}
|
\end{enumerate}
|
||||||
|
17
cs677/hw5/src/Makefile
Normal file
17
cs677/hw5/src/Makefile
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
|
||||||
|
TARGETS := gen_adj_matrix
|
||||||
|
TARGETS += floyd-sequential
|
||||||
|
TARGETS += floyd-parallel
|
||||||
|
|
||||||
|
CXXFLAGS := -fopenmp
|
||||||
|
#CXXFLAGS += -DPRINT_RESULT
|
||||||
|
|
||||||
|
OBJS := $(foreach target,$(TARGETS),$(target).o)
|
||||||
|
|
||||||
|
all: $(TARGETS)
|
||||||
|
|
||||||
|
$(TARGETS): $(OBJS)
|
||||||
|
$(CXX) -o $@ $@.o $(CXXFLAGS)
|
||||||
|
|
||||||
|
clean:
|
||||||
|
-rm -f *~ *.o $(TARGETS)
|
126
cs677/hw5/src/floyd-parallel.cc
Normal file
126
cs677/hw5/src/floyd-parallel.cc
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
|
||||||
|
/* Josh Holtrop
|
||||||
|
* 2008-10-15
|
||||||
|
* CS 677
|
||||||
|
* Grand Valley State University
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <sys/time.h> /* gettimeofday(), struct timeval */
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void usage(char * progname);
|
||||||
|
int readFile(char * fileName, vector<int> & v);
|
||||||
|
void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
|
||||||
|
|
||||||
|
void usage(char * progname)
|
||||||
|
{
|
||||||
|
cout << "Usage: " << progname << " <adjacency-file>" << endl;
|
||||||
|
exit(42);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char * argv[])
|
||||||
|
{
|
||||||
|
if (argc < 1)
|
||||||
|
usage(argv[0]);
|
||||||
|
|
||||||
|
vector<int> v;
|
||||||
|
|
||||||
|
int num_verts = readFile(argv[1], v);
|
||||||
|
int D[2][num_verts][num_verts];
|
||||||
|
convertToMatrix(v, num_verts, (int *) &D[0]);
|
||||||
|
|
||||||
|
struct timeval before, after;
|
||||||
|
gettimeofday(&before, NULL); /* Start timing */
|
||||||
|
|
||||||
|
/* Run Floyd's Algorithm on D */
|
||||||
|
for (int k = 1; k <= num_verts; k++)
|
||||||
|
{
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (int i = 0; i < num_verts; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < num_verts; j++)
|
||||||
|
{
|
||||||
|
int distWithoutK = D[(k-1) & 1][i][j];
|
||||||
|
int distItoK = D[(k-1) & 1][i][k-1];
|
||||||
|
int distKtoJ = D[(k-1) & 1][k-1][j];
|
||||||
|
int distWithK =
|
||||||
|
(distItoK == INT_MAX || distKtoJ == INT_MAX)
|
||||||
|
? INT_MAX
|
||||||
|
: distItoK + distKtoJ;
|
||||||
|
D[k & 1][i][j] = min(
|
||||||
|
distWithoutK,
|
||||||
|
distWithK
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday(&after, NULL); /* Stop timing */
|
||||||
|
|
||||||
|
#ifdef PRINT_RESULT
|
||||||
|
cout << "Result:" << endl;
|
||||||
|
/* Print out the final matrix */
|
||||||
|
for (int i = 0; i < num_verts; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < num_verts; j++)
|
||||||
|
{
|
||||||
|
if (D[num_verts & 1][i][j] == INT_MAX)
|
||||||
|
printf("-- ");
|
||||||
|
else
|
||||||
|
printf("%2d ", D[num_verts & 1][i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double time_before = before.tv_sec + before.tv_usec / 1000000.0;
|
||||||
|
double time_after = after.tv_sec + after.tv_usec / 1000000.0;
|
||||||
|
double diff = time_after - time_before;
|
||||||
|
cout << "Elapsed time: " << diff << " seconds." << endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readFile(char * fileName, vector<int> & v)
|
||||||
|
{
|
||||||
|
ifstream in(fileName);
|
||||||
|
if (!in.is_open())
|
||||||
|
{
|
||||||
|
cerr << "Error opening " << fileName << endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int weight;
|
||||||
|
in >> weight;
|
||||||
|
if (in.eof())
|
||||||
|
break;
|
||||||
|
v.push_back(weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) sqrt(v.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
|
||||||
|
{
|
||||||
|
int vidx = 0;
|
||||||
|
int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
|
||||||
|
for (int i = 0; i < num_verts; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < num_verts; j++)
|
||||||
|
{
|
||||||
|
(*V)[i][j] = v[vidx++];
|
||||||
|
if ((*V)[i][j] == 0)
|
||||||
|
(*V)[i][j] = INT_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
125
cs677/hw5/src/floyd-sequential.cc
Normal file
125
cs677/hw5/src/floyd-sequential.cc
Normal file
@ -0,0 +1,125 @@
|
|||||||
|
|
||||||
|
/* Josh Holtrop
|
||||||
|
* 2008-10-15
|
||||||
|
* CS 677
|
||||||
|
* Grand Valley State University
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <vector>
|
||||||
|
#include <sys/time.h> /* gettimeofday(), struct timeval */
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
void usage(char * progname);
|
||||||
|
int readFile(char * fileName, vector<int> & v);
|
||||||
|
void convertToMatrix(const vector<int> & v, int num_verts, int * vals);
|
||||||
|
|
||||||
|
void usage(char * progname)
|
||||||
|
{
|
||||||
|
cout << "Usage: " << progname << " <adjacency-file>" << endl;
|
||||||
|
exit(42);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char * argv[])
|
||||||
|
{
|
||||||
|
if (argc < 1)
|
||||||
|
usage(argv[0]);
|
||||||
|
|
||||||
|
vector<int> v;
|
||||||
|
|
||||||
|
int num_verts = readFile(argv[1], v);
|
||||||
|
int D[2][num_verts][num_verts];
|
||||||
|
convertToMatrix(v, num_verts, (int *) &D[0]);
|
||||||
|
|
||||||
|
struct timeval before, after;
|
||||||
|
gettimeofday(&before, NULL); /* Start timing */
|
||||||
|
|
||||||
|
/* Run Floyd's Algorithm on D */
|
||||||
|
for (int k = 1; k <= num_verts; k++)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < num_verts; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < num_verts; j++)
|
||||||
|
{
|
||||||
|
int distWithoutK = D[(k-1) & 1][i][j];
|
||||||
|
int distItoK = D[(k-1) & 1][i][k-1];
|
||||||
|
int distKtoJ = D[(k-1) & 1][k-1][j];
|
||||||
|
int distWithK =
|
||||||
|
(distItoK == INT_MAX || distKtoJ == INT_MAX)
|
||||||
|
? INT_MAX
|
||||||
|
: distItoK + distKtoJ;
|
||||||
|
D[k & 1][i][j] = min(
|
||||||
|
distWithoutK,
|
||||||
|
distWithK
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
gettimeofday(&after, NULL); /* Stop timing */
|
||||||
|
|
||||||
|
#ifdef PRINT_RESULT
|
||||||
|
cout << "Result:" << endl;
|
||||||
|
/* Print out the final matrix */
|
||||||
|
for (int i = 0; i < num_verts; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < num_verts; j++)
|
||||||
|
{
|
||||||
|
if (D[num_verts & 1][i][j] == INT_MAX)
|
||||||
|
printf("-- ");
|
||||||
|
else
|
||||||
|
printf("%2d ", D[num_verts & 1][i][j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
double time_before = before.tv_sec + before.tv_usec / 1000000.0;
|
||||||
|
double time_after = after.tv_sec + after.tv_usec / 1000000.0;
|
||||||
|
double diff = time_after - time_before;
|
||||||
|
cout << "Elapsed time: " << diff << " seconds." << endl;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int readFile(char * fileName, vector<int> & v)
|
||||||
|
{
|
||||||
|
ifstream in(fileName);
|
||||||
|
if (!in.is_open())
|
||||||
|
{
|
||||||
|
cerr << "Error opening " << fileName << endl;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
int weight;
|
||||||
|
in >> weight;
|
||||||
|
if (in.eof())
|
||||||
|
break;
|
||||||
|
v.push_back(weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (int) sqrt(v.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
void convertToMatrix(const vector<int> & v, int num_verts, int * vals)
|
||||||
|
{
|
||||||
|
int vidx = 0;
|
||||||
|
int (*V)[num_verts][num_verts] = (int (*)[num_verts][num_verts]) vals;
|
||||||
|
for (int i = 0; i < num_verts; i++)
|
||||||
|
{
|
||||||
|
for (int j = 0; j < num_verts; j++)
|
||||||
|
{
|
||||||
|
(*V)[i][j] = v[vidx++];
|
||||||
|
if ((*V)[i][j] == 0)
|
||||||
|
(*V)[i][j] = INT_MAX;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
39
cs677/hw5/src/gen_adj_matrix.c
Normal file
39
cs677/hw5/src/gen_adj_matrix.c
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
// gen_adj_matrix.c
|
||||||
|
// generates random adjacency matrix of desired size and connectivity
|
||||||
|
// gw
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
int i, j;
|
||||||
|
FILE *fp;
|
||||||
|
int numVertices, avgConnectivity;
|
||||||
|
|
||||||
|
// get parameters
|
||||||
|
if (argc != 3) {
|
||||||
|
printf ("usage: progName numVertices avgConnectivity\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
numVertices = atoi(argv[1]);
|
||||||
|
avgConnectivity = atoi(argv[2]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// open/create output file
|
||||||
|
if ((fp = fopen ("adjacency.dat", "w")) == NULL) {
|
||||||
|
printf ("coulnd not create file\n");
|
||||||
|
exit(-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// generate random graph/matrix
|
||||||
|
for (i=0; i < numVertices; i++)
|
||||||
|
for (j=0; j < numVertices; j++)
|
||||||
|
if (((rand() % numVertices) < avgConnectivity) && i!=j)
|
||||||
|
fprintf (fp, "%d ", 1 + (rand() % 20));
|
||||||
|
else
|
||||||
|
fprintf (fp, "%d ", 0);
|
||||||
|
fclose (fp);
|
||||||
|
return 0;
|
||||||
|
}
|
14
cs677/hw5/src/time-for-n.pl
Executable file
14
cs677/hw5/src/time-for-n.pl
Executable file
@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/perl
|
||||||
|
|
||||||
|
use strict;
|
||||||
|
use warnings;
|
||||||
|
|
||||||
|
for (my $n = 100; $n <= 1000; $n += 100)
|
||||||
|
{
|
||||||
|
print("./gen_adj_matrix $n 6\n");
|
||||||
|
system('./gen_adj_matrix', $n, 6);
|
||||||
|
print("./floyd-sequential adjacency.dat\n");
|
||||||
|
system('./floyd-sequential', 'adjacency.dat');
|
||||||
|
print("./floyd-parallel adjacency.dat\n");
|
||||||
|
system('./floyd-parallel', 'adjacency.dat');
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user