/*
 * <<< torus_3d.cc >>>
 *
 * --- Instruction-level 3D-torus simulator
 *     Copyright (C) 2000-2001 Amano Lab., Keio University. ---
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with this program; if not, write to the Free Software Foundation, Inc.,
 *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
 */

#include <cstdlib>
#include <iostream>
#include <string>
#include <vector>
#include <isis/isis.h>
#include "torus_3d_ecube_router.h"
#include "torus_3d_escape_router.h"
#include "torus_3d_duato_router.h"

using namespace std;

typedef r3000_word word_type;
typedef size_t node_address_type;
typedef r3081_processing_element processing_element_type;
typedef network_packet<node_address_type, word_type> packet_type;
typedef router<packet_type> router_base_type;
typedef mp_network_interface<packet_type, word_type> network_interface_type;
typedef packet_type::timestamp_type timestamp_type;

static const char* to_basename(const char* s)
{
	const char* head = s;
	while (*s != '\0') {
		if (*s == '/') head = s + 1;
		s++;
	}
	return head;
}

void usage(const char* name)
{
	cout
	<< "usage: " << name
	<< " <target> [options] [-- [target options]]" << endl
	<< "options:" << endl
	<< "  -b<n>, --buffer=<n>    set channel buffer size."			 << endl
	<< "  -c,    --clock         print execution time in clock."	 << endl
	<< "  -d<n>, --delay=<n>     set memory access delay."			 << endl
	<< "  -l<n>, --length=<n>    set max data size in flits."		 << endl
	<< "  -p<n>, --pool=<n>      set receive pool buffer size."		 << endl
	<< "  -r<n>, --reqbuf=<n>    set send/recv request buffer size." << endl
	<< "  -x<n>, --xsize=<n>     set node number of x-axis."		 << endl
	<< "  -y<n>, --ysize=<n>     set node number of y-axis."		 << endl
	<< "  -z<n>, --zsize=<n>     set node number of z-axis."		 << endl
	<< "         --routing=<str> set routing algorithm:"			 << endl
	<< "                           ecube:  ecube routing (default)"	 << endl
	<< "                           escape: Duato's escape-path"		 << endl
	<< "                           duato:  Duato's protocol"		 << endl
	<< "  -h,    --help          print this message."				 << endl
	<< "  -i,    --info          print simulation information."		 << endl
	<< "  -v,    --verbose       verbosely output."					 << endl
	<< "  --version              print version information."		 << endl;
}

void version(const char* name)
{
	cout << name << " - " << isis::version_string() << endl;
}

int main(int, char** argv)
{
	const word_type local_memory_address = 0;
	const word_type local_memory_size = 0x20000000;
	const word_type network_interface_address = 0xbfe00000;
	size_t x_size = 4, y_size = 2, z_size = 2, penum;
	enum { ECUBE, ESCAPE, DUATO } routing = ECUBE;
	string routing_name = "ecube routing";
	unsigned int mem_delay = 0;
	size_t buf_size = 64, dataflit_len = 128, pool_size = 16, reqbuf_size = 16;
	bool clock_flag = false, info_flag = false, verbose_flag = false;
	size_t i;

	// read commandline argument for simulator
	argument_parser arg((const char* const*)(argv + 1));
	if (arg.defined('h') || arg.defined("help"))
		{ usage(to_basename(argv[0])); return 0; }
	if (arg.defined("version"))
		{ version(to_basename(argv[0])); return 0; }
	if (arg.defined('c') || arg.defined("clock"))
		clock_flag = true;
	if (arg.defined('i') || arg.defined("info"))
		info_flag = true;
	if (arg.defined('v') || arg.defined("verbose"))
		verbose_flag = info_flag = true;
	if (arg['b'] != NULL)	   buf_size = atoi(arg['b']);
	if (arg["buffer"] != NULL) buf_size = atoi(arg["buffer"]);
	if (arg['d'] != NULL)	   mem_delay = atoi(arg['d']);
	if (arg["delay"] != NULL)  mem_delay = atoi(arg["delay"]);
	if (arg['l'] != NULL)	   dataflit_len = atoi(arg['l']);
	if (arg["length"] != NULL) dataflit_len = atoi(arg["length"]);
	if (arg['p'] != NULL)	   pool_size = atoi(arg['p']);
	if (arg["pool"] != NULL)   pool_size = atoi(arg["pool"]);
	if (arg['r'] != NULL)	   reqbuf_size = atoi(arg['r']);
	if (arg["reqbuf"] != NULL) reqbuf_size = atoi(arg["reqbuf"]);
	if (arg['x'] != NULL)	   x_size = atoi(arg['x']);
	if (arg["xsize"] != NULL)  x_size = atoi(arg["xsize"]);
	if (arg['y'] != NULL)	   y_size = atoi(arg['y']);
	if (arg["ysize"] != NULL)  y_size = atoi(arg["ysize"]);
	if (arg['z'] != NULL)	   z_size = atoi(arg['z']);
	if (arg["zsize"] != NULL)  z_size = atoi(arg["zsize"]);
	if (arg["routing"] != NULL)	{
		if (string(arg["routing"]) == "escape") {
			routing = ESCAPE;
			routing_name = "duato's escape path";
		} else if (string(arg["routing"]) == "duato") {
			routing = DUATO;
			routing_name = "duato's routing";
		}
	}
	if (arg.argument()[0] == NULL) {
		cerr << "No executable file specified." << endl;
		return 1;
	}
	penum = x_size * y_size * z_size;

	array<processing_element_type> pe(penum); // ?? cannot use vector
	vector<network_interface_type> ni(penum);
	vector<router_base_type*> rt(penum);

	// setup processing elements
	for (i = 0; i < pe.size(); i++) {
		// --- ids
		pe[i].set_processor_number(penum);
		pe[i].set_processor_id(i);
		// --- memory map
		pe[i].set_local_memory_area(local_memory_address, local_memory_size);
		pe[i].set_local_memory_read_wait(mem_delay);
		pe[i].set_local_memory_write_wait(mem_delay);
		// --- I/O
		pe[i].set_file_table_size(16);
		pe[i].set_standard_input_stream(cin);
		pe[i].set_standard_output_stream(cout);
		pe[i].set_standard_error_stream(cerr);
		// --- load program to memory and set commandline arguments
		if (!pe[i].load(arg.argument()[0])) {
			cerr << arg.argument()[0]
				 << ": No such file or directory." << endl;
			return 1;
		}
		pe[i].set_commandline_argument(arg.argument());
		pe[i].set_sysinfo("network_interface_address",
						  network_interface_address);
	}

	// setup network interfaces
	for (i = 0; i < ni.size(); i++) {
		ni[i].set_node_address(i);
		ni[i].set_max_data_size(dataflit_len);
		ni[i].set_send_request_buffer_size(reqbuf_size);
		ni[i].set_receive_request_buffer_size(reqbuf_size);
		ni[i].set_receive_pool_size(pool_size);
		ni[i].bus_port_ref().connect(pe[i].processor().port_ref());
		ni[i].set_memory_address(network_interface_address);
	}

	// setup routers
	for (i = 0; i < rt.size(); i++) {
		switch (routing) {
		case ECUBE:
			{
				typedef torus_3d_ecube_router<packet_type> RT;
				RT* p = new RT;
				p->set_size(x_size, y_size, z_size);
				rt[i] = p;
			}
			break;
		case ESCAPE:
			{
				typedef torus_3d_escape_router<packet_type> RT;
				RT* p = new RT;
				p->set_size(x_size, y_size, z_size);
				rt[i] = p;
			}
			break;
		case DUATO:
			{
				typedef torus_3d_duato_router<packet_type> RT;
				RT* p = new RT;
				p->set_size(x_size, y_size, z_size);
				rt[i] = p;
			}
			break;
		}
		rt[i]->set_node_address(i);
		for (size_t j = 0; j < rt[i]->input_size(); j++) {
			rt[i]->set_buffer_size(j, buf_size);
		}
	}
	for (i = 0; i < rt.size(); i++) {
		const size_t xy_size = x_size * y_size;
		const size_t x = i % x_size, y = (i / x_size) % y_size, z = i / xy_size;
		size_t rg, lf, dw, up, bt, tp;
		rg = ((x != x_size - 1) ? (i + 1) : (i - x));
		lf = ((x != 0) ? (i - 1) : (i + x_size - 1));
		dw = ((y != y_size - 1) ? (i + x_size) : (i - y * x_size));
		up = ((y != 0) ? (i - x_size) : (i + (y_size - 1) * x_size));
		bt = ((z != z_size - 1) ? (i + xy_size) : (i - z * xy_size));
		tp = ((z != 0) ? (i - xy_size) : (i + (z_size - 1) * xy_size));
		rt[i]->output_channel(0).connect(rt[rg]->input_channel(1));
		rt[i]->output_channel(1).connect(rt[lf]->input_channel(0));
		rt[i]->output_channel(2).connect(rt[dw]->input_channel(3));
		rt[i]->output_channel(3).connect(rt[up]->input_channel(2));
		rt[i]->output_channel(4).connect(rt[bt]->input_channel(5));
		rt[i]->output_channel(5).connect(rt[tp]->input_channel(4));
		rt[i]->input_channel(6).connect(ni[i].output_channel());
		rt[i]->output_channel(6).connect(ni[i].input_channel());
	}

	// last setup: routers and network interfaces
	for (i = 0; i < rt.size(); i++) {
		ni[i].setup();
		rt[i]->setup();
	}

	// show condition information
	if (info_flag) {
		cout << "--- condition ---" << endl
			 << "network topology:       " << "3d-torus" << endl
			 << "network size:           " << x_size << 'x' << y_size << 'x'
										   << z_size
										   << " (" << penum << ')' << endl
			 << "routing algorithm:      " << routing_name << endl
			 << "channel buffer size:    " << buf_size << endl
			 << "NI request buffer size: " << reqbuf_size << endl
			 << "NI pool size:           " << pool_size << endl
			 << "max data size:          " << dataflit_len << endl
			 << "memory access delay:    " << mem_delay << endl
			 << endl;
	}

	// execute simulation
	{
		size_t count = 0;
		while (1) {
			bool halt_flag = true;
			for (i = 0; i < penum; i++) {
				if (!pe[i].is_halt()) {
					halt_flag = false;
					break;
				}
			}
			if (halt_flag) break;
			if (++count & 0x3ff == 0) {
				bool buserror_flag = false;
				for (i = 0; i < penum; i++) {
					if (pe[i].is_bus_error()) {
						cout << hex
							 << "bus error(I:0x"
							 << pe[i].processor().program_counter()
							 << ", D:0x" << pe[i].bus_error_address() << ')'
							 << dec << endl;
						buserror_flag = true;
						break;
					}
				}
				if (buserror_flag) break;
			}
			if (verbose_flag) {
				cout << "--- clk:" << pe[0].timer_clock_value()
					 << " ---" << endl;
				for (i = 0; i < rt.size(); i++) {
					cout << "rt" << i << ": " << *rt[i] << endl;
				}
				for (i = 0; i < ni.size(); i++) {
					cout << "ni" << i << ": " << ni[i] << endl;
				}
				cout << endl;
			}
			for (i = 0; i < penum; i++) {
				pe[i].clock_in();
				ni[i].clock_in();
				rt[i]->clock_in();
			}
			for (i = 0; i < penum; i++) {
				pe[i].clock_out();
				ni[i].clock_out();
				rt[i]->clock_out();
			}
		}
	}

	// show results
	if (info_flag) {
		const size_t total_clock = pe[0].timer_clock_value();
		size_t send_pkt_count = 0, recv_pkt_count = 0, recv_flit_count = 0,
			   total_hops = 0;
		timestamp_type total_latency = 0;
		double ave_len, ave_hops, throughput, latency;
		for (size_t i = 0; i < rt.size(); i++) {
			total_hops += rt[i]->total_hop_count();
		}
		for (size_t i = 0; i < ni.size(); i++) {
			send_pkt_count += ni[i].send_packet_count();
			recv_pkt_count += ni[i].receive_packet_count();
			recv_flit_count += ni[i].receive_flit_count();
			total_latency += ni[i].total_latency();
		}
		ave_len = double(recv_flit_count) / recv_pkt_count;
		ave_hops = double(total_hops) / recv_pkt_count;
		throughput = double(recv_flit_count) / ni.size() / total_clock;
		latency = double(total_latency) / recv_pkt_count;
		cout << "--- results ---" << endl
			 << "total clock: " << setw(8) << total_clock << endl
			 << "sent:        " << setw(8) << send_pkt_count << endl
			 << "received:    " << setw(8) << recv_pkt_count << endl
			 << "ave. length: " << setw(8) << ave_len << endl
			 << "ave. hops:   " << setw(8) << ave_hops << endl
			 << "throughput:  " << setw(8) << throughput << endl
			 << "latency:     " << setw(8) << latency << endl;
	} else if (clock_flag) {
		cout << "clock: " << pe[0].timer_clock_value() << endl;
	}

	// exit
	for (i = 0; i < rt.size(); i++) delete rt[i];
	return pe[0].commandline_status();
}
