From 0c6614e89e3e428aaa9d7cffc8d54f56b3b2ffb9 Mon Sep 17 00:00:00 2001 From: josh Date: Sat, 1 Nov 2008 15:34:45 +0000 Subject: [PATCH] copied hw7/hw.tex to hw5/ git-svn-id: svn://anubis/gvsu@226 45c1a28c-8058-47b2-ae61-ca45b979098e --- cs677/hw5/hw.tex | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 cs677/hw5/hw.tex diff --git a/cs677/hw5/hw.tex b/cs677/hw5/hw.tex new file mode 100644 index 0000000..5416898 --- /dev/null +++ b/cs677/hw5/hw.tex @@ -0,0 +1,76 @@ +% Preamble +\documentclass[11pt,fleqn]{article} +\usepackage{amsmath, amsthm, amssymb} +\usepackage{fancyhdr} +\oddsidemargin -0.25in +\textwidth 6.75in +\topmargin -0.5in +\headheight 0.75in +\headsep 0.25in +\textheight 8.75in +\pagestyle{fancy} +\renewcommand{\headrulewidth}{0pt} +\renewcommand{\footrulewidth}{0pt} +\fancyhf{} +\lhead{HW Chap. 7\\\ \\\ } +\rhead{Josh Holtrop\\2008-10-15\\CS 677} +\rfoot{\thepage} + +\begin{document} + +\noindent +\begin{enumerate} +\item[1.]{ + Break the ``parallel region'' into a function accepting a \texttt{void *} + parameter. + Before the ``parallel region'' create a \texttt{for} loop which loops + \textit{n} times (where \textit{n} is the number of threads), + invoking \texttt{pthread\_create()} once for each thread. + Any variables local to the function containing the ``parallel region'' + that the ``parallel region'' function needs access to + would have to be stored as pointers in a structure whose address was + passed as an argument to the thread function. + Then, the thread would run the code in the ``parallel region''. + After the region, a \texttt{for} loop would exist to loop over all + the threads created in the first loop and execute \texttt{pthread\_join()} + for each one. +} + +\vskip 2em +\item[2.]{ + Each thread could store its result into an array indexed by its ID. + Then, when computation is complete, a regular \texttt{for} loop + within an OpenMP parallel region could iterate + $\lceil \log_2 n \rceil$ times. + In the first iteration, threads where $ID\mod 2 = 0$ would perform + the reduction operation on their array value and the array value + at index $ID + 1$ while the rest of the threads are idle. + In the second iteration, threads where $ID\mod 4 = 0$ would perform + the reduction operation on their array value and the array value + at index $ID + 2$ while the rest of the threads are idle. + This process would repeat (doubling the mod value and offset index + each time) until the reduction operation has been + performed to produce the final result value at index 0 of the + array. +} + +\vskip 2em +\item[3.]{ + My OpenMP solution to Floyd's algorithm was implemented by + using a \texttt{\#pragma omp parallel for} on the second \texttt{for} + loop of the algorithm. + Thus, for each $k$ value, the rows are broken up for different + threads to process. + The same thread computes an entire row of the matrix. + + The run times nicely grow exponentially as $n$ grows linearly. + On eos24, with $n >= 400$, the speedup was $\approx 3.6$. + + As the number of threads increased, the run time decreased + exponentially until $t > 4$, where more threads did not gain + anything since there were only 4 processing cores. +} + +\end{enumerate} + +\end{document}