Render multithreaded locally by default
This commit is contained in:
parent
aad02e522d
commit
36e12d1e90
128
src/main/fart.cc
128
src/main/fart.cc
@ -9,6 +9,10 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <pthread.h>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
#include "Scene.h"
|
#include "Scene.h"
|
||||||
#include "distrib/distrib.h"
|
#include "distrib/distrib.h"
|
||||||
@ -16,6 +20,106 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Number of contiguous pixels each worker thread claims at a time. Pulling
|
||||||
|
* work in small chunks (rather than whole rows) keeps the load balanced even
|
||||||
|
* when individual rows vary wildly in render cost, while keeping the claimed
|
||||||
|
* pixels contiguous so cache locality and dispatch overhead stay close to the
|
||||||
|
* row-based baseline. See benchmarks: 64 was the sweet spot.
|
||||||
|
*/
|
||||||
|
#define RENDER_CHUNK_SIZE 64
|
||||||
|
|
||||||
|
/*
|
||||||
|
* State shared by the local multithreaded renderer. Worker threads pull
|
||||||
|
* fixed-size chunks of pixels off a lock-free atomic cursor until the image
|
||||||
|
* is complete.
|
||||||
|
*/
|
||||||
|
typedef struct
|
||||||
|
{
|
||||||
|
Scene * scene;
|
||||||
|
unsigned char * data;
|
||||||
|
int width;
|
||||||
|
int total_pixels;
|
||||||
|
std::atomic<int> next_pixel; /* dispatch cursor: next unclaimed pixel */
|
||||||
|
std::atomic<int> pixels_done; /* pixels finished, for progress */
|
||||||
|
std::atomic<int> last_permille; /* last tenth-of-a-percent printed */
|
||||||
|
} render_thread_state_t;
|
||||||
|
|
||||||
|
static void * render_thread(void * varg)
|
||||||
|
{
|
||||||
|
render_thread_state_t * state = (render_thread_state_t *) varg;
|
||||||
|
const int width = state->width;
|
||||||
|
const int total_pixels = state->total_pixels;
|
||||||
|
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
/* atomically claim the next chunk of contiguous pixels */
|
||||||
|
int start = state->next_pixel.fetch_add(RENDER_CHUNK_SIZE,
|
||||||
|
std::memory_order_relaxed);
|
||||||
|
if (start >= total_pixels)
|
||||||
|
break;
|
||||||
|
int end = start + RENDER_CHUNK_SIZE;
|
||||||
|
if (end > total_pixels)
|
||||||
|
end = total_pixels;
|
||||||
|
|
||||||
|
for (int pixel = start; pixel < end; pixel++)
|
||||||
|
{
|
||||||
|
int y = pixel / width;
|
||||||
|
int x = pixel % width;
|
||||||
|
state->scene->renderPixel(x, y, &state->data[3 * pixel]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* report progress without locking: of the threads that advance the
|
||||||
|
* tenth-of-a-percent counter, only the one that wins the update prints,
|
||||||
|
* so the display stays monotonic and free of interleaved output */
|
||||||
|
int done = state->pixels_done.fetch_add(end - start,
|
||||||
|
std::memory_order_relaxed)
|
||||||
|
+ (end - start);
|
||||||
|
int permille = (int) (1000L * done / total_pixels);
|
||||||
|
int prev = state->last_permille.load(std::memory_order_relaxed);
|
||||||
|
while (permille > prev)
|
||||||
|
{
|
||||||
|
if (state->last_permille.compare_exchange_weak(
|
||||||
|
prev, permille, std::memory_order_relaxed))
|
||||||
|
{
|
||||||
|
printf("\e[8D%2.1f%%", permille / 10.0);
|
||||||
|
fflush(stdout);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/* prev reloaded by compare_exchange_weak; re-test permille > prev */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void renderThreaded(Scene & scene,
|
||||||
|
unsigned char * data,
|
||||||
|
int width,
|
||||||
|
int height)
|
||||||
|
{
|
||||||
|
int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
|
||||||
|
if (num_threads < 1)
|
||||||
|
num_threads = 1;
|
||||||
|
|
||||||
|
render_thread_state_t state;
|
||||||
|
state.scene = &scene;
|
||||||
|
state.data = data;
|
||||||
|
state.width = width;
|
||||||
|
state.total_pixels = width * height;
|
||||||
|
state.next_pixel.store(0, std::memory_order_relaxed);
|
||||||
|
state.pixels_done.store(0, std::memory_order_relaxed);
|
||||||
|
state.last_permille.store(0, std::memory_order_relaxed);
|
||||||
|
|
||||||
|
vector<pthread_t> threads(num_threads);
|
||||||
|
for (int i = 0; i < num_threads; i++)
|
||||||
|
pthread_create(&threads[i], NULL, render_thread, &state);
|
||||||
|
for (int i = 0; i < num_threads; i++)
|
||||||
|
pthread_join(threads[i], NULL);
|
||||||
|
|
||||||
|
printf("\e[8D");
|
||||||
|
}
|
||||||
|
|
||||||
void usage(const char * progname)
|
void usage(const char * progname)
|
||||||
{
|
{
|
||||||
cout << "Usage: " << progname << " [options] <scene-file>" << endl;
|
cout << "Usage: " << progname << " [options] <scene-file>" << endl;
|
||||||
@ -274,28 +378,8 @@ int main(int argc, char * argv[])
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
int total_pixels = height * width;
|
/* local multithreaded render using all available cores */
|
||||||
int total_pixels_1000 = total_pixels / 1000;
|
renderThreaded(scene, data, width, height);
|
||||||
if (total_pixels_1000 < 1)
|
|
||||||
total_pixels_1000 = 1;
|
|
||||||
int pixel_num = 0;
|
|
||||||
/* "sequential" version */
|
|
||||||
for (int i = 0; i < height; i++)
|
|
||||||
{
|
|
||||||
for (int j = 0; j < width; j++)
|
|
||||||
{
|
|
||||||
int pixel = i * width + j;
|
|
||||||
scene.renderPixel(j, i, &data[3 * pixel]);
|
|
||||||
pixel_num++;
|
|
||||||
if (pixel_num % total_pixels_1000 == 0)
|
|
||||||
{
|
|
||||||
double pct = 100.0 * pixel_num / (double) total_pixels;
|
|
||||||
printf("\e[8D%2.1f%%", pct);
|
|
||||||
fflush(stdout);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf("\e[8D");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
gettimeofday(&after, NULL); /* stop timing */
|
gettimeofday(&after, NULL); /* stop timing */
|
||||||
|
|||||||
@ -3,6 +3,7 @@
|
|||||||
#define REFPTR_H REFPTR_H
|
#define REFPTR_H REFPTR_H
|
||||||
|
|
||||||
#include <stdlib.h> /* NULL */
|
#include <stdlib.h> /* NULL */
|
||||||
|
#include <atomic> /* std::atomic */
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
class refptr
|
class refptr
|
||||||
@ -25,7 +26,10 @@ class refptr
|
|||||||
void destroy();
|
void destroy();
|
||||||
|
|
||||||
T * m_ptr;
|
T * m_ptr;
|
||||||
int * m_refCount;
|
/* reference count is atomic so that refptr copies may be made
|
||||||
|
* concurrently from multiple threads (e.g. the multithreaded
|
||||||
|
* renderer) without corrupting the count */
|
||||||
|
std::atomic<int> * m_refCount;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename T> refptr<T>::refptr()
|
template <typename T> refptr<T>::refptr()
|
||||||
@ -37,8 +41,7 @@ template <typename T> refptr<T>::refptr()
|
|||||||
template <typename T> refptr<T>::refptr(T * ptr)
|
template <typename T> refptr<T>::refptr(T * ptr)
|
||||||
{
|
{
|
||||||
m_ptr = ptr;
|
m_ptr = ptr;
|
||||||
m_refCount = new int;
|
m_refCount = new std::atomic<int>(1);
|
||||||
*m_refCount = 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
|
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
|
||||||
@ -57,8 +60,7 @@ template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
|
|||||||
{
|
{
|
||||||
destroy();
|
destroy();
|
||||||
m_ptr = ptr;
|
m_ptr = ptr;
|
||||||
m_refCount = new int;
|
m_refCount = new std::atomic<int>(1);
|
||||||
*m_refCount = 1;
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -67,7 +69,7 @@ template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
|
|||||||
this->m_ptr = orig.m_ptr;
|
this->m_ptr = orig.m_ptr;
|
||||||
this->m_refCount = orig.m_refCount;
|
this->m_refCount = orig.m_refCount;
|
||||||
if (m_refCount != NULL)
|
if (m_refCount != NULL)
|
||||||
(*m_refCount)++;
|
m_refCount->fetch_add(1, std::memory_order_relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T> refptr<T>::~refptr()
|
template <typename T> refptr<T>::~refptr()
|
||||||
@ -79,15 +81,13 @@ template <typename T> void refptr<T>::destroy()
|
|||||||
{
|
{
|
||||||
if (m_refCount != NULL)
|
if (m_refCount != NULL)
|
||||||
{
|
{
|
||||||
if (*m_refCount <= 1)
|
/* fetch_sub returns the value prior to the decrement; if it was 1
|
||||||
|
* then this was the last reference and we own the cleanup */
|
||||||
|
if (m_refCount->fetch_sub(1, std::memory_order_acq_rel) == 1)
|
||||||
{
|
{
|
||||||
delete m_ptr;
|
delete m_ptr;
|
||||||
delete m_refCount;
|
delete m_refCount;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
(*m_refCount)--;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user