#include <mpi.h>
#include <time.h>
#include <stdio.h>
#include <math.h>
#define MAX 1536
void main(int argc, char **argv)
{
	int i,j,k, pid, nproc;
	int ss;
	int thread;
	double a[MAX][MAX], b[MAX], x[MAX], p[MAX], y[MAX], r[MAX], alpha,alpha0,alpha1,beta,beta0;
	double startcomp, endcomp;
	double t;
	MPI_Status status;
	MPI_Init(&argc, &argv);
	MPI_Comm_rank(MPI_COMM_WORLD, &pid);
	MPI_Comm_size(MPI_COMM_WORLD, &nproc);

// Initialize

	for (i=0; i< MAX; i++) b[i] = 50.0+i;
	for (i=0; i< MAX; i++) x[i] = i;
	for (j=0; j< MAX; j++) {
		for(i=0; i<MAX; i++) {
			if(i== j) a[i][j] = 1500.0;
			else a[i][j] = 1.0; 
		}
	}
	for (j=0; j< MAX; j++) {
		y[j]=0;
		for (i=0; i< MAX; i++) 
			y[j] += a[i][j]*x[i];
	}
	for(i=0; i<MAX; i++) {
		p[i]= b[i]- y[i];
		r[i] = p[i];
	}
	printf("MAX:%d nproc:%d pid:%d\n",MAX, nproc, pid);
// Loop
	startcomp = MPI_Wtime();
for(k=0; k<MAX; k++) {
// A x p : target of parallelization
		for (j=0; j<MAX; j++) {
			y[j] = 0.0;
			for(i=0; i<MAX; i++)
				y[j] += a[i][j]*p[j]; }
// The following part should be executed only in pid=0 
		alpha0 = 0; alpha1=0;
		for(i=0; i<MAX; i++) {
			alpha0 += p[i]*r[i];
			alpha1 += p[i]*y[i];
		}
		alpha = alpha0/alpha1;
		for(i=0;i<MAX;i++) 
			x[i] = x[i]+alpha*p[i];
		for(i=0;i<MAX;i++) 
			r[i] = r[i]- alpha*y[i];
		beta0 = 0.0;
		for(i=0; i<MAX;i++) 
			beta0 += r[i]+y[i];
		beta = -beta0/alpha1;
		for(i=0; i<MAX;i++) 
			p[i] = r[i] + beta*p[i]; 
}
	endcomp = MPI_Wtime();
if(pid == 0) {
	for(i=0;i<MAX;i++) 
		printf("%d: %f %f\n",i,x[i],r[i]);

	printf("Time:%lf thread:%d \n", endcomp-startcomp, nproc);
}
	MPI_Finalize();
}
