#include "hex.h"
__global__ void ex1Kernel(float *d_A, float *d_B, float *d_C) {
  // Step 1. 自身のCUDAスレッドIDを計算する
  int thread_id = blockDim.x * blockIdx.x + threadIdx.x;
  int i;
   d_C[thread_id] = 0.0;
  for(i=0; i< LENGTH; i++) {
    d_C[thread_id] += (d_A[thread_id] - d_B[i])*(d_A[thread_id]-d_B[i]);
  }
}

__host__ void ex1Host(float *h_A, float *h_B, float *h_C, int length) {
  for (int i = 0; i < length; i++) {
	h_C[i] = 0.0;
  	for (int j = 0; j < length; j++) {
    h_C[i] += (h_A[i]- h_B[j])*(h_A[i]-h_B[j]); 
	}
  }
}
