Render multithreaded locally by default
This commit is contained in:
parent
aad02e522d
commit
36e12d1e90
128
src/main/fart.cc
128
src/main/fart.cc
@ -9,6 +9,10 @@
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
#include <pthread.h>
|
||||
#include <atomic>
|
||||
|
||||
#include "Scene.h"
|
||||
#include "distrib/distrib.h"
|
||||
@ -16,6 +20,106 @@
|
||||
|
||||
using namespace std;
|
||||
|
||||
/*
|
||||
* Number of contiguous pixels each worker thread claims at a time. Pulling
|
||||
* work in small chunks (rather than whole rows) keeps the load balanced even
|
||||
* when individual rows vary wildly in render cost, while keeping the claimed
|
||||
* pixels contiguous so cache locality and dispatch overhead stay close to the
|
||||
* row-based baseline. See benchmarks: 64 was the sweet spot.
|
||||
*/
|
||||
#define RENDER_CHUNK_SIZE 64
|
||||
|
||||
/*
|
||||
* State shared by the local multithreaded renderer. Worker threads pull
|
||||
* fixed-size chunks of pixels off a lock-free atomic cursor until the image
|
||||
* is complete.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
Scene * scene;
|
||||
unsigned char * data;
|
||||
int width;
|
||||
int total_pixels;
|
||||
std::atomic<int> next_pixel; /* dispatch cursor: next unclaimed pixel */
|
||||
std::atomic<int> pixels_done; /* pixels finished, for progress */
|
||||
std::atomic<int> last_permille; /* last tenth-of-a-percent printed */
|
||||
} render_thread_state_t;
|
||||
|
||||
static void * render_thread(void * varg)
|
||||
{
|
||||
render_thread_state_t * state = (render_thread_state_t *) varg;
|
||||
const int width = state->width;
|
||||
const int total_pixels = state->total_pixels;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* atomically claim the next chunk of contiguous pixels */
|
||||
int start = state->next_pixel.fetch_add(RENDER_CHUNK_SIZE,
|
||||
std::memory_order_relaxed);
|
||||
if (start >= total_pixels)
|
||||
break;
|
||||
int end = start + RENDER_CHUNK_SIZE;
|
||||
if (end > total_pixels)
|
||||
end = total_pixels;
|
||||
|
||||
for (int pixel = start; pixel < end; pixel++)
|
||||
{
|
||||
int y = pixel / width;
|
||||
int x = pixel % width;
|
||||
state->scene->renderPixel(x, y, &state->data[3 * pixel]);
|
||||
}
|
||||
|
||||
/* report progress without locking: of the threads that advance the
|
||||
* tenth-of-a-percent counter, only the one that wins the update prints,
|
||||
* so the display stays monotonic and free of interleaved output */
|
||||
int done = state->pixels_done.fetch_add(end - start,
|
||||
std::memory_order_relaxed)
|
||||
+ (end - start);
|
||||
int permille = (int) (1000L * done / total_pixels);
|
||||
int prev = state->last_permille.load(std::memory_order_relaxed);
|
||||
while (permille > prev)
|
||||
{
|
||||
if (state->last_permille.compare_exchange_weak(
|
||||
prev, permille, std::memory_order_relaxed))
|
||||
{
|
||||
printf("\e[8D%2.1f%%", permille / 10.0);
|
||||
fflush(stdout);
|
||||
break;
|
||||
}
|
||||
/* prev reloaded by compare_exchange_weak; re-test permille > prev */
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void renderThreaded(Scene & scene,
|
||||
unsigned char * data,
|
||||
int width,
|
||||
int height)
|
||||
{
|
||||
int num_threads = sysconf(_SC_NPROCESSORS_ONLN);
|
||||
if (num_threads < 1)
|
||||
num_threads = 1;
|
||||
|
||||
render_thread_state_t state;
|
||||
state.scene = &scene;
|
||||
state.data = data;
|
||||
state.width = width;
|
||||
state.total_pixels = width * height;
|
||||
state.next_pixel.store(0, std::memory_order_relaxed);
|
||||
state.pixels_done.store(0, std::memory_order_relaxed);
|
||||
state.last_permille.store(0, std::memory_order_relaxed);
|
||||
|
||||
vector<pthread_t> threads(num_threads);
|
||||
for (int i = 0; i < num_threads; i++)
|
||||
pthread_create(&threads[i], NULL, render_thread, &state);
|
||||
for (int i = 0; i < num_threads; i++)
|
||||
pthread_join(threads[i], NULL);
|
||||
|
||||
printf("\e[8D");
|
||||
}
|
||||
|
||||
void usage(const char * progname)
|
||||
{
|
||||
cout << "Usage: " << progname << " [options] <scene-file>" << endl;
|
||||
@ -274,28 +378,8 @@ int main(int argc, char * argv[])
|
||||
}
|
||||
else
|
||||
{
|
||||
int total_pixels = height * width;
|
||||
int total_pixels_1000 = total_pixels / 1000;
|
||||
if (total_pixels_1000 < 1)
|
||||
total_pixels_1000 = 1;
|
||||
int pixel_num = 0;
|
||||
/* "sequential" version */
|
||||
for (int i = 0; i < height; i++)
|
||||
{
|
||||
for (int j = 0; j < width; j++)
|
||||
{
|
||||
int pixel = i * width + j;
|
||||
scene.renderPixel(j, i, &data[3 * pixel]);
|
||||
pixel_num++;
|
||||
if (pixel_num % total_pixels_1000 == 0)
|
||||
{
|
||||
double pct = 100.0 * pixel_num / (double) total_pixels;
|
||||
printf("\e[8D%2.1f%%", pct);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("\e[8D");
|
||||
/* local multithreaded render using all available cores */
|
||||
renderThreaded(scene, data, width, height);
|
||||
}
|
||||
|
||||
gettimeofday(&after, NULL); /* stop timing */
|
||||
|
||||
@ -3,6 +3,7 @@
|
||||
#define REFPTR_H REFPTR_H
|
||||
|
||||
#include <stdlib.h> /* NULL */
|
||||
#include <atomic> /* std::atomic */
|
||||
|
||||
template <typename T>
|
||||
class refptr
|
||||
@ -25,7 +26,10 @@ class refptr
|
||||
void destroy();
|
||||
|
||||
T * m_ptr;
|
||||
int * m_refCount;
|
||||
/* reference count is atomic so that refptr copies may be made
|
||||
* concurrently from multiple threads (e.g. the multithreaded
|
||||
* renderer) without corrupting the count */
|
||||
std::atomic<int> * m_refCount;
|
||||
};
|
||||
|
||||
template <typename T> refptr<T>::refptr()
|
||||
@ -37,8 +41,7 @@ template <typename T> refptr<T>::refptr()
|
||||
template <typename T> refptr<T>::refptr(T * ptr)
|
||||
{
|
||||
m_ptr = ptr;
|
||||
m_refCount = new int;
|
||||
*m_refCount = 1;
|
||||
m_refCount = new std::atomic<int>(1);
|
||||
}
|
||||
|
||||
template <typename T> refptr<T>::refptr(const refptr<T> & orig)
|
||||
@ -57,8 +60,7 @@ template <typename T> refptr<T> & refptr<T>::operator=(T * ptr)
|
||||
{
|
||||
destroy();
|
||||
m_ptr = ptr;
|
||||
m_refCount = new int;
|
||||
*m_refCount = 1;
|
||||
m_refCount = new std::atomic<int>(1);
|
||||
return *this;
|
||||
}
|
||||
|
||||
@ -67,7 +69,7 @@ template <typename T> void refptr<T>::cloneFrom(const refptr<T> & orig)
|
||||
this->m_ptr = orig.m_ptr;
|
||||
this->m_refCount = orig.m_refCount;
|
||||
if (m_refCount != NULL)
|
||||
(*m_refCount)++;
|
||||
m_refCount->fetch_add(1, std::memory_order_relaxed);
|
||||
}
|
||||
|
||||
template <typename T> refptr<T>::~refptr()
|
||||
@ -79,15 +81,13 @@ template <typename T> void refptr<T>::destroy()
|
||||
{
|
||||
if (m_refCount != NULL)
|
||||
{
|
||||
if (*m_refCount <= 1)
|
||||
/* fetch_sub returns the value prior to the decrement; if it was 1
|
||||
* then this was the last reference and we own the cleanup */
|
||||
if (m_refCount->fetch_sub(1, std::memory_order_acq_rel) == 1)
|
||||
{
|
||||
delete m_ptr;
|
||||
delete m_refCount;
|
||||
}
|
||||
else
|
||||
{
|
||||
(*m_refCount)--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user