/*
 * <<< fft.c >>>
 *
 * --- Sample application for isis 'fast fourier transform' - for uniprocessor
 *     Copyright (C) 1995-2000 Amano Lab., Keio University. ---
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>

#define PI 3.14159265358979323846

#define DEFAULT_SIZE	64
#define MAX_PRINT_SIZE	256
#define DIFF_LIMIT		1e-7

typedef double data_t;
typedef struct { data_t re, im; } complex;

static int fix_number_to_exp2n(int);
static int get_bitrev_number(int, int);
static void make_data(complex*, int, const complex*);
static void copy_data(const complex*, complex*, int);
static int check(const complex*, const complex*, int);
static void show(const complex*, int);
static void make_rotate_table(complex*, int);
void fft(complex*, int, const complex*);
void ufft(complex*, int, const complex*);

static int fix_number_to_exp2n(int x)
{
	int i = 0;
	while (x != 0) {
		x >>= 1;
		i++;
	}
	return (i == 0) ? 0 : (1 << (i - 1));
}

static int get_bitrev_number(int bit_number, int x)
{
	int y, i;
	y = 0;
	for (i = 0; i < bit_number; i++) {
		y = (y << 1) | (x & 1);
		x >>= 1;
	}
	return y;
}

static void make_data(complex *z, int n, const complex *rotate_tbl)
{
	/*
	int i;
	for (i = 0; i < n; i++) {
		data_t t = 2 * PI * i / n;
		tbl[i].re = cos(t);
		tbl[i].im = 0;
	}
	*/
	const int n_div_4_x_1 = n / 4;
	const int n_div_4_x_2 = n / 2;
	const int n_div_4_x_3 = n_div_4_x_1 + n_div_4_x_2;
	int i;
	for (i = 0; i < n_div_4_x_1; i++) {
		z[i].re = rotate_tbl[i].re;
		z[i + n_div_4_x_1].re = -rotate_tbl[i].im;
		z[i + n_div_4_x_2].re = -rotate_tbl[i].re;
		z[i + n_div_4_x_3].re = rotate_tbl[i].im;
	}
	for (i = 0; i < n; i++) z[i].im = 0;
}

static void copy_data(const complex *src, complex *dst, int n)
{
	int i;
	for (i = 0; i < n; i++) {
		dst[i].re = src[i].re;
		dst[i].im = src[i].im;
	}
}

static int check(const complex *z1, const complex *z2, int n)
{
	data_t diff_sigma = 0;
	int i;
	for (i = 0; i < n; i++) {
		data_t dx, dy;
		dx = z1[i].re - z2[i].re;
		dy = z1[i].im - z2[i].im;
		diff_sigma = dx * dx + dy * dy;
	}
	return sqrt(diff_sigma) < DIFF_LIMIT;
}

static void show(const complex *z, int n)
{
	int i;
	for (i = 0; i < n; i++) {
		printf("%03d: (% 5.3f, % 5.3f)\n", i, z[i].re, z[i].im);
	}
}

static void make_rotate_table(complex *tbl, int n)
{
	int i;
	/*
	for (i = 0; i < n / 4; i++) {
		data_t t = 2 * PI * i / n;
		tbl[i].re = cos(t);
		tbl[i].im = sin(t);
	}
	*/
	data_t c, s, dc, ds, t;
	t = sin(PI / n);
	dc = 2 * t * t;
	ds = sqrt(dc * (2 - dc));
	t = 2 * dc;
	c = tbl[0].re = 1;
	s = tbl[0].im = 0;
	for (i = 1; i < n / 8; i++) {
		c -= dc;
		s += ds;
		dc += t * c;
		ds -= t * s;
		tbl[i].re = tbl[n / 4 - i].im = c;
		tbl[i].im = tbl[n / 4 - i].re = s;
	}
	if (n / 8 != 0) {
		tbl[n / 8].re = tbl[n / 8].im = sqrt(0.5);
	}
}

int main(int argc, char **argv)
{
	complex *z1;
	complex *z2;
	complex *z3;
	complex *rotate_tbl;
	int size = DEFAULT_SIZE, check_flag = 0, verbose_flag = 0;
	while (*++argv != NULL) {
		if (**argv == '-') {
			switch (*++*argv) {
			case 't':
				check_flag = 1;
				break;
			case 'v':
				verbose_flag = 1;
				break;
			default:
				break;
			}
		} else if (isdigit((int)**argv)) {
			size = atoi(*argv);
			size = (size < 0) ? DEFAULT_SIZE : size;
		}
	}
	size = fix_number_to_exp2n(size);
	if (size < 4) size = 4;
	z1 = (complex*)malloc(size * sizeof(complex));
	rotate_tbl = (complex*)malloc(size / 4 * sizeof(complex));
	if (!check_flag) {
		z2 = z3 = NULL;
		if (z1 == NULL || rotate_tbl == NULL) {
			fputs("Out of memory.\n", stderr);
			exit(1);
		}
	} else {
		z2 = (complex*)malloc(size * sizeof(complex));
		z3 = (complex*)malloc(size * sizeof(complex));
		if (z1 == NULL || z2 == NULL || z3 == NULL || rotate_tbl == NULL) {
			fputs("Out of memory.\n", stderr);
			exit(1);
		}
	}
	make_rotate_table(rotate_tbl, size);
	make_data(z1, size, rotate_tbl);
	if (verbose_flag) {
		printf("size:%d\n", size);
	}
	if (!check_flag) {
		fft(z1, size, rotate_tbl);
		if (verbose_flag && size <= MAX_PRINT_SIZE) show(z1, size);
		return 0;
	} else {
		int result;
		copy_data(z1, z2, size);
		fft(z2, size, rotate_tbl);
		copy_data(z2, z3, size);
		ufft(z3, size, rotate_tbl);
		result = check(z1, z3, size);
		if (verbose_flag) {
			if (size <= MAX_PRINT_SIZE) {
				int i;
				printf("idx: "
					   "source:          "
					   "fft:             "
					   "ufft:            "
					   "diff:\n");
				for (i = 0; i < size; i++) {
					data_t dx, dy, d;
					dx = z1[i].re - z3[i].re;
					dy = z1[i].im - z3[i].im;
					d = sqrt(dx * dx + dy * dy);
					printf("%03d: ", i);
					printf("(% 5.3f, % 5.3f) ", z1[i].re, z1[i].im);
					printf("(% 5.3f, % 5.3f) ", z2[i].re, z2[i].im);
					printf("(% 5.3f, % 5.3f) ", z3[i].re, z3[i].im);
					printf("%9.3e\n", d);
				}
			}
			puts(result ? "success." : "failed.");
		}
		return result ? 0 : 1;
	}
}

void fft(complex *z, int n, const complex *rotate_tbl)
{
	int i, j, k1, k2;
	int n_shift;       /* (1 << n_shift) == n */
	int n_div_4_shift; /* (1 << n_div_4_shift) == n / 4 */
	int n_div_4_mask;  /* (x & n_div_4_mask) == x % (n / 4) */
	int n_div_i_shift; /* (1 << n_div_i_shift) == n / i */
	int i_div_2;       /* i_div_2 == i / 2 */
	n_shift = 0;
	i = n;
	while (i > 1) {
		i >>= 1;
		n_shift++;
	}
	n_div_4_shift = n_shift - 2;
	n_div_4_mask = (1 << n_div_4_shift) - 1;
	for (n_div_i_shift = 0; n_div_i_shift < n_shift; n_div_i_shift++) {
		i = (1 << (n_shift - n_div_i_shift));
		i_div_2 = (i >> 1);
		for (j = 0; j < i_div_2; j++) {
			data_t rx, ry;
			{
				const int r_idx = (j << n_div_i_shift);
				const int r_idx_upper = (r_idx >> n_div_4_shift);
				const int r_idx_lower = (r_idx & n_div_4_mask);
				switch (r_idx_upper) {
				case 0:
					rx = rotate_tbl[r_idx_lower].re;
					ry = rotate_tbl[r_idx_lower].im;
					break;
				case 1:
					rx = -rotate_tbl[r_idx_lower].im;
					ry = rotate_tbl[r_idx_lower].re;
					break;
				case 2:
					rx = -rotate_tbl[r_idx_lower].re;
					ry = -rotate_tbl[r_idx_lower].im;
					break;
				default: /* case 3 */
					rx = rotate_tbl[r_idx_lower].im;
					ry = -rotate_tbl[r_idx_lower].re;
					break;
				}
			}
			for (k1 = j; k1 < n; k1 += i) {
				data_t t1x, t1y, t2x, t2y;
				k2 = k1 + i_div_2;
				t1x = z[k1].re, t1y = z[k1].im;
				t2x = z[k2].re, t2y = z[k2].im;
#ifdef DEBUG
				printf("i:%02d - r[%02d]:(%+4.2f,%+4.2f)"
					   " z[%02d]:(%+4.2f,%+4.2f), z[%02d]:(%+4.2f,%+4.2f)\n",
					   i, (j << n_div_i_shift), rx, ry,
					   k1, t1x, t1y, k2, t2x, t2y);
#endif
				z[k1].re += t2x;
				z[k1].im += t2y;
				t2x = t1x - t2x;
				t2y = t1y - t2y;
				z[k2].re = t2y * ry + t2x * rx;
				z[k2].im = t2y * rx - t2x * ry;
			}
		}
	}
	for (i = 0; i < n; i++) {
		j = get_bitrev_number(n_shift, i);
		if (i < j) {
			complex t;
			t = z[i], z[i] = z[j], z[j] = t;
		}
	}
	for (i = 0; i < n; i++) z[i].re /= n, z[i].im /= n;
}

void ufft(complex *z, int n, const complex *rotate_tbl)
{
	int i, j, k1, k2;
	int n_shift;       /* (1 << n_shift) == n */
	int n_div_4_shift; /* (1 << n_div_4_shift) == n / 4 */
	int n_div_4_mask;  /* (x & n_div_4_mask) == x % (n / 4) */
	int n_div_i_shift; /* (1 << n_div_i_shift) == n / i */
	int i_div_2;       /* i_div_2 == i / 2 */
	n_shift = 0;
	i = n;
	while (i > 1) {
		i >>= 1;
		n_shift++;
	}
	n_div_4_shift = n_shift - 2;
	n_div_4_mask = (1 << n_div_4_shift) - 1;
	for (n_div_i_shift = 0; n_div_i_shift < n_shift; n_div_i_shift++) {
		i = (1 << (n_shift - n_div_i_shift));
		i_div_2 = (i >> 1);
		for (j = 0; j < i_div_2; j++) {
			data_t rx, ry;
			{
				int r_idx = (j << n_div_i_shift);
				int r_idx_upper = (r_idx >> n_div_4_shift);
				int r_idx_lower = (r_idx & n_div_4_mask);
				switch (r_idx_upper) {
				case 0:
					rx = rotate_tbl[r_idx_lower].re;
					ry = rotate_tbl[r_idx_lower].im;
					break;
				case 1:
					rx = -rotate_tbl[r_idx_lower].im;
					ry = rotate_tbl[r_idx_lower].re;
					break;
				case 2:
					rx = -rotate_tbl[r_idx_lower].re;
					ry = -rotate_tbl[r_idx_lower].im;
					break;
				default: /* case 3 */
					rx = rotate_tbl[r_idx_lower].im;
					ry = -rotate_tbl[r_idx_lower].re;
					break;
				}
			}
			for (k1 = j; k1 < n; k1 += i) {
				data_t t1x, t1y, t2x, t2y;
				k2 = k1 + i_div_2;
				t1x = z[k1].re, t1y = z[k1].im;
				t2x = z[k2].re, t2y = z[k2].im;
				z[k1].re += t2x;
				z[k1].im += t2y;
				t2x = t1x - t2x;
				t2y = t1y - t2y;
				z[k2].re = t2x * rx - t2y * ry;
				z[k2].im = t2x * ry + t2y * rx;
			}
		}
	}
	for (i = 0; i < n; i++) {
		j = get_bitrev_number(n_shift, i);
		if (i < j) {
			complex t;
			t = z[i], z[i] = z[j], z[j] = t;
		}
	}
}
