/*
 * <<< mp_raytrace.c >>>
 *
 * --- Sample application for isis 'ray trace' - for MPI
 *     Copyright (C) 2000-2003 Amano Lab., Keio University. ---
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

#include <ctype.h>
#include <limits.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <mpi.h>

/* default screen size, block size */
#define DEFAULT_XSIZE 64
#define DEFAULT_YSIZE 64

/* eye point, target point */
#define EYE_X     -30
#define EYE_Y     -10
#define EYE_Z      25
#define TARGET_X    0
#define TARGET_Y    0
#define TARGET_Z    0
#define EYE_ANGLE  20

/* ball definition (position:0,0,0 specular:white) */
#define BALL_RADIUS 5
#define BALLCOLOR_R 0.5
#define BALLCOLOR_G 0.5
#define BALLCOLOR_B 0.9

/* floor (x=0,y=0) */
#define FLOOR_Z           (-BALL_RADIUS)
#define FLOORCOLOR1_R     1.0
#define FLOORCOLOR1_G     1.0
#define FLOORCOLOR1_B     0.5
#define FLOORCOLOR2_R     0.5
#define FLOORCOLOR2_G     1.0
#define FLOORCOLOR2_B     0.5
#define INV_FLOOR_DENSITY 0.1

/* parallel light (color:white) */
#define LIGHT_X  1
#define LIGHT_Y -1
#define LIGHT_Z -1

/* ambient (color:white) */
#define AMBIENT .7

typedef struct { double x, y, z; } vector;
typedef struct { double r, g, b; } color;

static void show_help(void);
static int fix_number_to_exp2n(int);
static void output(unsigned char*, int, int);
static void initialize(void);
static void calc(color*, double, double);
static void render(unsigned char*, unsigned char*, int, int, int, int);

/* global constants */
static color floorcolor1_bright;
static color floorcolor1_dark;
static color floorcolor2_bright;
static color floorcolor2_dark;
static vector screen_x_vec, screen_y_vec, screen_z_vec;
static vector light_vec;
static double coef_c, scoef_b_base_2, scoef_c_base;

void show_help(void)
{
	puts("usage: raytrace [options] [size]");
	puts("options:");
	puts("  -x<n>  set image size of x-axis.");
	puts("  -y<n>  set image size of y-axis.");
	puts("  -h     print this message.");
	puts("  -i     print timing statistics.");
	puts("  -o     output rendered image to stdout.");
	puts("  -v     verbosely output.");
}

int fix_number_to_exp2n(int x)
{
	int i = 0;
	while (x != 0) {
		x >>= 1;
		i++;
	}
	return (i == 0) ? 0 : (1 << (i - 1));
}

void output(unsigned char *a, int x_size, int y_size)
{
	unsigned char head[] = { 0x42, 0x4d, 0x36, 0xf9, 0x15, 0x00, 0x00, 0x00,
							 0x00, 0x00, 0x36, 0x00, 0x00, 0x00, 0x28, 0x00 };
	unsigned char tail[] = { 0x00, 0x00, 0x01, 0x00, 0x18, 0x00, 0x00, 0x00,
							 0x00, 0x00, 0x00, 0xf9, 0x15, 0x00, 0x6d, 0x0b,
							 0x00, 0x00, 0x6d, 0x6b, 0x00, 0x00, 0x00, 0x00,
							 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 };
	int i, j;
	for (i = 0; i < sizeof(head); i++) putchar(head[i]);
	putchar('\0');
	putchar('\0');
	putchar((unsigned char)(x_size & 0xff));
	putchar((unsigned char)((x_size >> 8) & 0xff));
	putchar('\0');
	putchar('\0');
	putchar((unsigned char)(y_size & 0xff));
	putchar((unsigned char)((y_size >> 8) & 0xff));
	for (i = 0; i < sizeof(tail); i++) putchar(tail[i]);
	for (i = y_size - 1; i >= 0; i--) {
		unsigned char *p = &a[i * x_size * 3];
		for (j = 0; j < x_size; j++) {
			putchar((unsigned char)(p[2]));
			putchar((unsigned char)(p[1]));
			putchar((unsigned char)(p[0]));
			p += 3;
		}
	}
}

int main(int argc, char **argv)
{
	unsigned char *a;
	unsigned char *w;
	clock_t init_start_time, init_end_time, calc_start_time, calc_end_time;
	int x_size = DEFAULT_XSIZE, y_size = DEFAULT_YSIZE, punum, puid;
	int info_flag = 0, output_flag = 0, verbose_flag = 0;

	/* initialize MPI environment and random seed */
	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &punum);
	MPI_Comm_rank(MPI_COMM_WORLD, &puid);

	/* read arguments */
	while (*++argv != NULL) {
		if (**argv == '-') {
			switch (*++*argv) {
			case 'h':
				if (puid == 0) show_help();
				return 0;
			case 'i':
				info_flag = 1;
				break;
			case 'o':
				output_flag = 1;
				break;
			case 'v':
				verbose_flag = 1;
				break;
			case 'x':
				x_size = atoi(*argv + 1);
				if (x_size < 0) x_size = DEFAULT_XSIZE;
				break;
			case 'y':
				y_size = atoi(*argv + 1);
				if (y_size < 0) y_size = DEFAULT_YSIZE;
				break;
			default:
				break;
			}
		} else if (isdigit((int)**argv)) {
			int size = atoi(*argv);
			if (size > 0) {
				size = fix_number_to_exp2n(size);
				x_size = 1, y_size = 1;
				while (x_size * y_size < size) {
					if (x_size == y_size) {
						x_size <<= 1;
					} else {
						y_size <<= 1;
					}
				}
			}
		}
	}
	if (output_flag) info_flag = verbose_flag = 0;
	init_start_time = init_end_time = calc_start_time = calc_end_time = 0;

	/* initialize */
	if (puid == 0) {
		a = (unsigned char*)malloc(x_size * y_size * 3);
		w = (unsigned char*)malloc(x_size * y_size * 3);
		if (a == NULL || w == NULL) {
			fputs("Out of memory.\n", stderr);
			exit(1);
		}
	} else {
		w = (unsigned char*)malloc(x_size * y_size / punum * 3);
		if (w == NULL) {
			fputs("Out of memory.\n", stderr);
			exit(1);
		}
	}
	if (info_flag) init_start_time = clock();
	initialize();
	if (info_flag) init_end_time = clock();
	if (verbose_flag && puid == 0) {
		printf("size:(%d,%d)\n", x_size, y_size);
	}

	/* calculate */
	if (info_flag) calc_start_time = clock();
	render(a, w, x_size, y_size, punum, puid);
	if (info_flag) calc_end_time = clock();

	/* output */
	if (output_flag && puid == 0) {
		const int blocksize = x_size * y_size / punum;
		int i, j;
		if (punum > 1) {
			unsigned char *s = a;
			for (i = 0; i < punum; i++) {
				unsigned char *d = &w[i * 3];
				for (j = 0; j < blocksize; j++) {
					d[0] = s[0];
					d[1] = s[1];
					d[2] = s[2];
					s += 3;
					d += punum * 3;
				}
			}
		}
		output(w, x_size, y_size);
	}

	/* show timing statistics */
	if (info_flag && puid == 0) {
		printf("init start: %10ld\n"
			   "init end:   %10ld\n"
			   "calc start: %10ld\n"
			   "calc end:   %10ld\n"
			   "init time:  %10ld\n"
			   "calc time:  %10ld\n",
			   (long)init_start_time, (long)init_end_time,
			   (long)calc_start_time, (long)calc_end_time,
			   (long)(init_end_time - init_start_time),
			   (long)(calc_end_time - calc_start_time));
	}

	MPI_Finalize();
	return 0;
}

void initialize(void)
{
	/* screen vector */
	{
		vector tx, ty, tz;
		double tx_norm, ty_norm, tz_norm, screen_x_norm;
		tz.x = TARGET_X - EYE_X;
		tz.y = TARGET_Y - EYE_Y;
		tz.z = TARGET_Y - EYE_Z;
		tz_norm = sqrt(tz.x * tz.x + tz.y * tz.y + tz.z * tz.z);
		screen_x_norm = tz_norm * sin(EYE_ANGLE * 2 * M_PI / 360);
		tx.x = tz.y;
		tx.y = -tz.x;
		tx.z = 0;
		tx_norm = sqrt(tx.x * tx.x + tx.y * tx.y + tx.z * tx.z);
		ty.x = tx.y * tz.z - tx.z * tz.y;
		ty.y = tx.z * tz.x - tx.x * tz.z;
		ty.z = tx.x * tz.y - tx.y * tz.x;
		ty_norm = sqrt(ty.x * ty.x + ty.y * ty.y + ty.z * ty.z);
		screen_x_vec.x = tx.x / tx_norm * screen_x_norm;
		screen_x_vec.y = tx.y / tx_norm * screen_x_norm;
		screen_x_vec.z = tx.z / tx_norm * screen_x_norm;
		screen_y_vec.x = ty.x / ty_norm * screen_x_norm;
		screen_y_vec.y = ty.y / ty_norm * screen_x_norm;
		screen_y_vec.z = ty.z / ty_norm * screen_x_norm;
		screen_z_vec.x = tz.x;
		screen_z_vec.y = tz.y;
		screen_z_vec.z = tz.z;
	}
	/* light vector */
	{
		double t;
		t = sqrt(LIGHT_X * LIGHT_X + LIGHT_Y * LIGHT_Y + LIGHT_Z * LIGHT_Z);
		light_vec.x = LIGHT_X / t;
		light_vec.y = LIGHT_Y / t;
		light_vec.z = LIGHT_Z / t;
	}
	/* colors */
	{
		double t = -light_vec.z;
		floorcolor1_dark.r = FLOORCOLOR1_R * AMBIENT;
		floorcolor1_dark.g = FLOORCOLOR1_G * AMBIENT;
		floorcolor1_dark.b = FLOORCOLOR1_B * AMBIENT;
		floorcolor1_bright.r = FLOORCOLOR1_R * t + floorcolor1_dark.r;
		floorcolor1_bright.g = FLOORCOLOR1_G * t + floorcolor1_dark.g;
		floorcolor1_bright.b = FLOORCOLOR1_B * t + floorcolor1_dark.b;
		floorcolor2_dark.r = FLOORCOLOR2_R * AMBIENT;
		floorcolor2_dark.g = FLOORCOLOR2_G * AMBIENT;
		floorcolor2_dark.b = FLOORCOLOR2_B * AMBIENT;
		floorcolor2_bright.r = FLOORCOLOR2_R * t + floorcolor2_dark.r;
		floorcolor2_bright.g = FLOORCOLOR2_G * t + floorcolor2_dark.g;
		floorcolor2_bright.b = FLOORCOLOR2_B * t + floorcolor2_dark.b;
	}
	/* scalar constants */
	coef_c = EYE_X * EYE_X + EYE_Y * EYE_Y + EYE_Z * EYE_Z
		   - BALL_RADIUS * BALL_RADIUS;
	scoef_b_base_2 = FLOOR_Z * light_vec.z;
	scoef_c_base = FLOOR_Z * FLOOR_Z - BALL_RADIUS * BALL_RADIUS;
}

void calc(color *c0, double dx, double dy)
{
	double evx, evy, evz; /* unit eye vector */
	double coef_b_2, coef_D_4;
	{
		double t;
		evx = screen_z_vec.x + screen_x_vec.x * dx + screen_y_vec.x * dy;
		evy = screen_z_vec.y + screen_x_vec.y * dx + screen_y_vec.y * dy;
		evz = screen_z_vec.z + screen_x_vec.z * dx + screen_y_vec.z * dy;
		t = sqrt(evx * evx + evy * evy + evz * evz);
		evx /= t;
		evy /= t;
		evz /= t;
	}
	coef_b_2 = EYE_X * evx + EYE_Y * evy + EYE_Z * evz;
	coef_D_4 = coef_b_2 * coef_b_2 - coef_c;
	if (coef_D_4 >= 0) {
		/* ray cross the ball */
		double npx, npy, npz; /* normal vector on the ball at the crosspoint */
		double ip_nv_light;   /* inner-product of np and light */
		{
			double t;
			t = (-coef_b_2 - sqrt(coef_D_4));
			npx = EYE_X + evx * t;
			npy = EYE_Y + evy * t;
			npz = EYE_Z + evz * t;
		}
		ip_nv_light = npx * light_vec.x + npy * light_vec.y + npz * light_vec.z;
		if (ip_nv_light < 0) {
			double diffuse, specular, inv_np_norm, inv_np_norm_2;
			inv_np_norm_2 = 1 / (npx * npx + npy * npy + npz * npz);
			inv_np_norm = sqrt(inv_np_norm_2);
			ip_nv_light *= inv_np_norm;
			diffuse = -ip_nv_light;
			{
				double mvx, mvy, mvz; /* reflection unit vector */
				double ip_nv_eye;
				ip_nv_eye = -2 * (evx * npx + evy * npy + evz * npz);
				mvx = ip_nv_eye * npx * inv_np_norm_2 + evx;
				mvy = ip_nv_eye * npy * inv_np_norm_2 + evy;
				mvz = ip_nv_eye * npz * inv_np_norm_2 + evz;
				specular = -(mvx * light_vec.x +
							 mvy * light_vec.y +
							 mvz * light_vec.z);
			}
			if (specular > .97) {
				specular *= specular;
				specular *= specular;
				specular *= specular;
				specular *= specular;
				specular *= specular;
				specular *= specular;
				specular *= specular; /* specular = pow(specular, 128); */
				c0->r = BALLCOLOR_R * diffuse + specular
					  + BALLCOLOR_R * AMBIENT;
				c0->g = BALLCOLOR_G * diffuse + specular
					  + BALLCOLOR_G * AMBIENT;
				c0->b = BALLCOLOR_B * diffuse + specular
					  + BALLCOLOR_B * AMBIENT;
			} else {
				c0->r = BALLCOLOR_R * diffuse + BALLCOLOR_R * AMBIENT;
				c0->g = BALLCOLOR_G * diffuse + BALLCOLOR_G * AMBIENT;
				c0->b = BALLCOLOR_B * diffuse + BALLCOLOR_B * AMBIENT;
			}
		} else {
			c0->r = BALLCOLOR_R * AMBIENT;
			c0->g = BALLCOLOR_G * AMBIENT;
			c0->b = BALLCOLOR_B * AMBIENT;
		}
	} else {
		/* ray cross the plane */
		double npx, npy; /* crosspoint */
		double scoef_b_2, scoef_c, scoef_D_4;
		int shade_flag, magic_val;
		{
			double t = (FLOOR_Z - EYE_Z) / evz;
			npx = evx * t + EYE_X;
			npy = evy * t + EYE_Y;
		}
		scoef_b_2 = (npx * light_vec.x + npy * light_vec.y) + scoef_b_base_2;
		scoef_c = npx * npx + npy * npy + scoef_c_base;
		scoef_D_4 = scoef_b_2 * scoef_b_2 - scoef_c;
		shade_flag = (scoef_D_4 >= 0);
		magic_val = (int)(npx * INV_FLOOR_DENSITY + 1000)
				  + (int)(npy * INV_FLOOR_DENSITY + 1000);
		if ((magic_val & 1) != 0) {
			if (!shade_flag) {
				c0->r = floorcolor1_bright.r;
				c0->g = floorcolor1_bright.g;
				c0->b = floorcolor1_bright.b;
			} else {
				c0->r = floorcolor1_dark.r;
				c0->g = floorcolor1_dark.g;
				c0->b = floorcolor1_dark.b;
			}
		} else {
			if (!shade_flag) {
				c0->r = floorcolor2_bright.r;
				c0->g = floorcolor2_bright.g;
				c0->b = floorcolor2_bright.b;
			} else {
				c0->r = floorcolor2_dark.r;
				c0->g = floorcolor2_dark.g;
				c0->b = floorcolor2_dark.b;
			}
		}
	}
}

void render(unsigned char *a, unsigned char *w, int x_size, int y_size,
			int punum, int puid)
{
	unsigned char *p = w;
	double x, y, dx, dy, init_x, init_y;
	int ix, iy, dix, diy, init_ix, init_iy;
	init_ix = puid % x_size;
	init_iy = puid / x_size;
	dix = punum;
	diy = ((punum / x_size > 0) ? (punum / x_size) : 1);
	{
		double dp = 2. / x_size;
		init_x = -1 + init_ix * dp;
		init_y = (double)y_size / x_size + init_iy * dp;
		dx = dix * dp;
		dy = diy * dp;
	}
	for (iy = init_iy, y = init_y; iy < y_size; iy += diy, y -= dy) {
		for (ix = init_ix, x = init_x; ix < x_size; ix += dix, x += dx) {
			color c0;
			calc(&c0, x, y);
			if (c0.r < 0) {
				p[0] = 0;
			} else if (c0.r > 1) {
				p[0] = UCHAR_MAX;
			} else {
				p[0] = (unsigned char)(c0.r * UCHAR_MAX);
			}
			if (c0.g < 0) {
				p[1] = 0;
			} else if (c0.g > 1) {
				p[1] = UCHAR_MAX;
			} else {
				p[1] = (unsigned char)(c0.g * UCHAR_MAX);
			}
			if (c0.b < 0) {
				p[2] = 0;
			} else if (c0.b > 1) {
				p[2] = UCHAR_MAX;
			} else {
				p[2] = (unsigned char)(c0.b * UCHAR_MAX);
			}
			p += 3;
		}
	}
	if (puid == 0) {
		const int blocksize = x_size * y_size / punum * 3;
		int i, j;
		memcpy(a, w, blocksize);
		for (i = punum; i > 1; i--) {
			MPI_Status status;
			int src;
			MPI_Recv(w, blocksize, MPI_CHAR, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD,
					 &status);
			src = status.MPI_SOURCE;
			memcpy(a + src * blocksize, w, blocksize);
		}
	} else {
		const int blocksize = x_size * y_size / punum * 3;
		MPI_Send(w, blocksize, MPI_CHAR, 0, 0, MPI_COMM_WORLD);
	}
}
