#include "camera.h"

#include "ray.h"
#include "rng.h"

#include <float.h>
#include <math.h>
#include <stdatomic.h>
#include <stdio.h>
#include <threads.h>

#ifndef M_PI
#define M_PI 3.14159265258979323846264
#endif

#define MAX_ITER 10
#define MIN_T 1e-6
#define SAMPLE_STDDEV 0.333

#define GAMMA 2.2

#define NTHREADS 20

typedef struct {
	const camera_t *camera;
	const scene_t *scene;
	pix_t *pixels;
	rng_t rng;
	unsigned start_y, row_count;
	atomic_uint *progress;
} work_slice_t;

static const vec3_t up = { 0.0, 1.0, 0.0 };

static const vec3_t white = { 1.0, 1.0, 1.0 };
static const vec3_t black = { 0.0, 0.0, 0.0 };

static vec3_t trace(ray_t ray, const scene_t *scene, rng_t *rng)
{
	vec3_t colour = white;
	for (unsigned i = 0; i < MAX_ITER; ++i) {
		hit_t hit = { .t = DBL_MAX };
		material_t material = {};
		for (unsigned j = 0; j < scene->obj_count; ++j) {
			const obj_t *obj = scene->objs + j;
			if (obj->intersect(obj->params, ray, &hit, MIN_T, hit.t))
				material = obj->material;
		}

		if (hit.t == DBL_MAX) {
			const double a = (ray.dir.y + 1.0) / 2.0;
			const vec3_t bg = vec3_add(
			    vec3_scale(scene->sky_colour, a), vec3_scale(white, 1 - a));
			return vec3_hadamard(colour, bg);
		}

		vec3_t atten;
		if (!material.scatter(material.params, hit, rng, &ray, &atten))
			return black;
		colour = vec3_hadamard(colour, atten);
	}

	return black;
}

static double linear_to_gamma(double channel)
{
	return pow(fmin(channel, 1.0), 1.0 / GAMMA);
}

static void setpix(vec3_t col, pix_t *out)
{
	out->r = UINT16_MAX * linear_to_gamma(col.x);
	out->g = UINT16_MAX * linear_to_gamma(col.y);
	out->b = UINT16_MAX * linear_to_gamma(col.z);
	out->a = UINT16_MAX;
}

static int render_thread(void *arg)
{
	work_slice_t *slice = (work_slice_t *)arg;
	const camera_t *camera = slice->camera;
	const uint32_t w = camera->img_width;
	const double sample_weight = 1.0 / (double)camera->samples_per_pixel;

	const unsigned stop_y = slice->start_y + slice->row_count;
	for (unsigned y = slice->start_y; y < stop_y; ++y) {
		const vec3_t row
		    = vec3_add(camera->pix_origin, vec3_scale(camera->y_step, y));
		for (unsigned x = 0; x < w; ++x) {
			const vec3_t pix = vec3_add(row, vec3_scale(camera->x_step, x));

			vec3_t colour = black;
			for (unsigned i = 0; i < camera->samples_per_pixel; ++i) {
				const vec3_t jitter
				    = rng_gaussian_xy(&slice->rng, SAMPLE_STDDEV);
				const vec3_t offset = vec3_add(
				    vec3_scale(camera->x_step, jitter.x),
				    vec3_scale(camera->y_step, jitter.y));
				const vec3_t jittered_pix = vec3_add(pix, offset);

				const ray_t ray = {
					.orig = camera->pos,
					.dir = vec3_unit(vec3_sub(jittered_pix, camera->pos)),
				};
				const vec3_t sample = trace(ray, slice->scene, &slice->rng);

				colour = vec3_add(colour, vec3_scale(sample, sample_weight));
			}

			setpix(colour, slice->pixels + (w * y + x));
		}

		atomic_fetch_add(slice->progress, 1);
	}

	return 0;
}

camera_t camera_init(
    vec3_t pos, vec3_t target, double fov, uint32_t img_width,
    uint32_t img_height, unsigned samples_per_pixel)
{
	const double fov_rad = M_PI * fov / 180.0;
	const double aspect = (double)img_width / (double)img_height;
	const double viewport_height = tan(fov_rad / 2);
	const double viewport_width = viewport_height * aspect;

	const vec3_t w_hat = vec3_unit(vec3_sub(target, pos));
	const vec3_t u_hat = vec3_unit(vec3_cross(up, w_hat));
	const vec3_t v_hat = vec3_unit(vec3_cross(w_hat, u_hat));

	const vec3_t u = vec3_scale(u_hat, viewport_width);
	const vec3_t v = vec3_scale(v_hat, -viewport_height);

	const vec3_t topleft
	    = vec3_sub(vec3_add(pos, w_hat), vec3_scale(vec3_add(u, v), 0.5));

	const vec3_t x_step = vec3_scale(u, 1.0 / (double)img_width);
	const vec3_t y_step = vec3_scale(v, 1.0 / (double)img_height);
	const vec3_t pix_origin
	    = vec3_add(topleft, vec3_scale(vec3_add(x_step, y_step), 0.5));

	return (camera_t) {
		.pos = pos,
		.pix_origin = pix_origin,
		.x_step = x_step,
		.y_step = y_step,
		.img_width = img_width,
		.img_height = img_height,
		.samples_per_pixel = samples_per_pixel,
	};
}

void camera_render(
    const camera_t *camera, const scene_t *scene, img_t *img_out)
{
	img_out->w = camera->img_width;
	img_out->h = camera->img_height;

	const unsigned rows = camera->img_height;
	const unsigned rows_per_thread = rows / NTHREADS;
	const unsigned rem_rows = rows % NTHREADS;

	atomic_uint progress = 0;

	thrd_t threads[NTHREADS];
	work_slice_t slices[NTHREADS];
	for (unsigned i = 0; i < NTHREADS; ++i) {
		slices[i].camera = camera;
		slices[i].scene = scene;
		slices[i].pixels = img_out->pix;
		slices[i].rng = rng_init(i);

		slices[i].start_y = i * rows_per_thread;
		slices[i].row_count = rows_per_thread;
		if (rem_rows != 0 && i == NTHREADS - 1)
			slices[i].row_count += rem_rows;

		slices[i].progress = &progress;

		thrd_create(threads + i, render_thread, slices + i);
	}

	const unsigned digits = (unsigned)floor(log(rows) / log(10)) + 1;
	unsigned done;
	do {
		thrd_sleep(&(struct timespec) { .tv_nsec = 50'000'000 }, nullptr);
		done = atomic_load(&progress);
		fprintf(stderr, "\r[%*d/%d]", digits, done, rows);
		fflush(stderr);
	} while (done < rows);

	for (unsigned i = 0; i < NTHREADS; ++i)
		thrd_join(threads[i], 0);
}