From 0c6614e89e3e428aaa9d7cffc8d54f56b3b2ffb9 Mon Sep 17 00:00:00 2001
From: josh <josh@45c1a28c-8058-47b2-ae61-ca45b979098e>
Date: Sat, 1 Nov 2008 15:34:45 +0000
Subject: [PATCH] copied hw7/hw.tex to hw5/

git-svn-id: svn://anubis/gvsu@226 45c1a28c-8058-47b2-ae61-ca45b979098e
---
 cs677/hw5/hw.tex | 76 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 cs677/hw5/hw.tex

diff --git a/cs677/hw5/hw.tex b/cs677/hw5/hw.tex
new file mode 100644
index 0000000..5416898
--- /dev/null
+++ b/cs677/hw5/hw.tex
@@ -0,0 +1,76 @@
+% Preamble
+\documentclass[11pt,fleqn]{article}
+\usepackage{amsmath, amsthm, amssymb}
+\usepackage{fancyhdr}
+\oddsidemargin	-0.25in
+\textwidth	6.75in
+\topmargin	-0.5in
+\headheight	0.75in
+\headsep	0.25in
+\textheight	8.75in
+\pagestyle{fancy}
+\renewcommand{\headrulewidth}{0pt}
+\renewcommand{\footrulewidth}{0pt}
+\fancyhf{}
+\lhead{HW Chap. 7\\\ \\\ }
+\rhead{Josh Holtrop\\2008-10-15\\CS 677}
+\rfoot{\thepage}
+
+\begin{document}
+
+\noindent
+\begin{enumerate}
+\item[1.]{
+    Break the ``parallel region'' into a function accepting a \texttt{void *}
+    parameter.
+    Before the ``parallel region'' create a \texttt{for} loop which loops
+    \textit{n} times (where \textit{n} is the number of threads),
+    invoking \texttt{pthread\_create()} once for each thread.
+    Any variables local to the function containing the ``parallel region''
+    that the ``parallel region'' function needs access to
+    would have to be stored as pointers in a structure whose address was
+    passed as an argument to the thread function.
+    Then, the thread would run the code in the ``parallel region''.
+    After the region, a \texttt{for} loop would exist to loop over all
+    the threads created in the first loop and execute \texttt{pthread\_join()}
+    for each one.
+}
+
+\vskip 2em
+\item[2.]{
+    Each thread could store its result into an array indexed by its ID.
+    Then, when computation is complete, a regular \texttt{for} loop
+    within an OpenMP parallel region could iterate
+    $\lceil \log_2 n \rceil$ times.
+    In the first iteration, threads where $ID\mod 2 = 0$ would perform
+    the reduction operation on their array value and the array value
+    at index $ID + 1$ while the rest of the threads are idle.
+    In the second iteration, threads where $ID\mod 4 = 0$ would perform
+    the reduction operation on their array value and the array value
+    at index $ID + 2$ while the rest of the threads are idle.
+    This process would repeat (doubling the mod value and offset index
+    each time) until the reduction operation has been
+    performed to produce the final result value at index 0 of the
+    array.
+}
+
+\vskip 2em
+\item[3.]{
+    My OpenMP solution to Floyd's algorithm was implemented by
+    using a \texttt{\#pragma omp parallel for} on the second \texttt{for}
+    loop of the algorithm.
+    Thus, for each $k$ value, the rows are broken up for different
+    threads to process.
+    The same thread computes an entire row of the matrix.
+
+    The run times nicely grow exponentially as $n$ grows linearly.
+    On eos24, with $n >= 400$, the speedup was $\approx 3.6$.
+
+    As the number of threads increased, the run time decreased
+    exponentially until $t > 4$, where more threads did not gain
+    anything since there were only 4 processing cores.
+}
+
+\end{enumerate}
+
+\end{document}