/**************************************/
/* You don't have to change this code */
/* Please modify the "gpu_calc.cu"    */
/**************************************/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include <getopt.h>
#include <malloc.h>
#include <time.h>
#include <omp.h>
#include "calculation.h"
#define ERROR 1.0e-6

void dft_swap(double *re, double *im, int num)
{
  int i;

  for(i=0; i<num/2; i++){
    swap(&re[i], &re[num/2 + i]);
    swap(&im[i], &im[num/2 + i]);
  }
}

void swap(double *a, double *b)
{
  double temp;

  temp = *a;
  *a = *b;
  *b = temp;
}

static __inline void usage_line(const char *s1, const char *s2, const char *s3)
{
  printf(" %-10s, %-20s: %s\n", s1, s2, s3);
}

static void usage()
{
  printf("\nUsage: main [OPTION] ...\n\n");
  printf("Options:\n");
  usage_line("-n [1-8]", "--particles=[1-8]", 
             "number of particles (N=[1-8]x128)    [default: 1]");
  usage_line("-s [1-3]", "--select=[1-3]", 
             "select problem data set");
  usage_line("-h", "--help", 
             "show this Usage");
  printf("\n");
}

void initialize(int &num, char *filename, int argc, char **argv)
{
  int opt;
  int N = 128 * 1;
  int data_num = 1;

  srand((unsigned)time(NULL));

  while(1) {
	int option_index = 0;
	static struct option long_options[] = {
	  {"the number of elements", 1, NULL, 'n'},
	  {"select", 1, NULL, 's'},
	  {"help", 0, NULL, 'h'},
	  {0, 0, 0, 0}
	};
	opt = getopt_long(argc, argv, "n:s:h?",
					  long_options, &option_index);
	if(opt == -1){
	  break;
	}else if(opt == 0){
	  continue;
	}else{
	  switch(opt){
	  case 'n':
		if(1 <= atoi(optarg) && atoi(optarg) <= 8)
		  N = atoi(optarg) * 128;
		else if(atoi(optarg) == 0)
		  N = rand() % 1024;
		break;
	  case 's':
		data_num = atoi(optarg);
		break;
	  case 'h':
	  case '?':
	  default:
		usage();
	  exit(0);
	  break;
	  }
	}
  }
  if (optind < argc) {
    printf("non-option ARGV-elements: ");
    while (optind < argc) {
      printf("%s ", argv[optind++]);
    }
    printf("\n");
  }

  num = N;

  if(data_num == 1){
	strcpy(filename, "data_1.txt");
  }else if(data_num == 2){
	strcpy(filename, "data_2.txt");
  }else if(data_num == 3){
	strcpy(filename, "data_3.txt");
  }
  return;
}

int main(int argc, char **argv)
{
  double *op_re, *op_im;
  double *cpu_re, *cpu_im;
  double *temp_re, *temp_im;
  double *op_norm, *cpu_norm;
  double init_time, start_cpu, end_cpu, start_op, end_op;
  double cpu_ave_re, cpu_ave_im, op_ave_re, op_ave_im;
  int num;
  int i, j;
  FILE *fp;
  int tnum;
  char filename[100];

  initialize(num, filename, argc, argv);

  if((fp = fopen(filename, "r")) == NULL){
	fprintf(stderr, "file open failur\n");
  }

  cpu_re = (double *)malloc(sizeof(double) * num);
  cpu_im = (double *)malloc(sizeof(double) * num);
  cpu_norm = (double *)malloc(sizeof(double) * num);
  op_re = (double *)malloc(sizeof(double) * num);
  op_im = (double *)malloc(sizeof(double) * num);
  op_norm = (double *)malloc(sizeof(double) * num);

  for(i=0; i<num; i++){
  	fscanf(fp, "%lf", &op_re[i]);
  	cpu_re[i] = op_re[i];
    cpu_im[i] = 0;
    op_im[i] = 0;
  }

  printf("\nthe number of elements : %d\n", num);
  printf("the name of dataset    : %s\n\n", filename);

  init_time = omp_get_wtime();

  dft_swap(cpu_re, cpu_im, num);


  if((temp_re = (double*)malloc(sizeof(double)*num)) == NULL){
    fprintf(stderr, "Allocationerror!\n");
    return 1;
  }

  if((temp_im = (double*)malloc(sizeof(double)*num)) == NULL){
    fprintf(stderr, "Allocationerror!\n");
    free(temp_re);
    return 1;
  }


// 1 cpu dummy

  for(i=0; i<num; i++){
    temp_re[i] = temp_im[i] = 0.0;
  }

  for(i=0; i<num; i++){
    for(j=0; j<num; j++){
      temp_re[i] += cpu_re[j]*cos(2*PI*i*j/num) + cpu_im[j]*sin(2*PI*i*j/num);
      temp_im[i] += -cpu_re[j]*sin(2*PI*i*j/num) + cpu_im[j]*cos(2*PI*i*j/num);
    }
  }

// 1 cpu real

  start_cpu = omp_get_wtime();
  for(i=0; i<num; i++){
    temp_re[i] = temp_im[i] = 0.0;
  }

  for(i=0; i<num; i++){
    for(j=0; j<num; j++){
      temp_re[i] += cpu_re[j]*cos(2*PI*i*j/num) + cpu_im[j]*sin(2*PI*i*j/num);
      temp_im[i] += -cpu_re[j]*sin(2*PI*i*j/num) + cpu_im[j]*cos(2*PI*i*j/num);
    }
  }

  for(i=0; i<num; i++){
    cpu_re[i] = temp_re[i];
    cpu_im[i] = temp_im[i];
	cpu_norm[i] = sqrt(temp_re[i]*temp_re[i] + temp_im[i]*temp_im[i]);
  }

  end_cpu = omp_get_wtime();
  dft_swap(cpu_re, cpu_im, num);

// open mp function
  dft_swap(op_re, op_im, num);

  start_op = omp_get_wtime();

//  tnum = omp_get_num_threads();
  for(i=0; i<num; i++){
    temp_re[i] = temp_im[i] = 0.0;
  }

  for(i=0; i<num; i++){
    for(j=0; j<num; j++){
      temp_re[i] += op_re[j]*cos(2*PI*i*j/num) + op_im[j]*sin(2*PI*i*j/num);
      temp_im[i] += -op_re[j]*sin(2*PI*i*j/num) + op_im[j]*cos(2*PI*i*j/num);
    }
  }

  for(i=0; i<num; i++){
    op_re[i] = temp_re[i];
    op_im[i] = temp_im[i];
	op_norm[i] = sqrt(temp_re[i]*temp_re[i] + temp_im[i]*temp_im[i]);
  }
  end_op = omp_get_wtime();

  dft_swap(op_re, op_im, num);

  for(i=0; i<num; i++) {
  	if(fabs(op_norm[i] - cpu_norm[i]) > ERROR) {
		printf("Comparison Error\n"); break; }
	}
	if(i==num) printf("Comparison OK\n");

  printf("Time for 1 CPU: %.6f[msec]   OpenMP: %.6f[msec]\n", 
  				end_cpu-start_cpu, end_op-start_op);

  fclose(fp);
  free(op_re);
  free(op_im);
  free(cpu_re);
  free(cpu_im);
  return 0;

}

