/* sim-outorder.c - sample out-of-order issue perf simulator implementation */

/* SimpleScalar(TM) Tool Suite
 * Copyright (C) 1994-2003 by Todd M. Austin, Ph.D. and SimpleScalar, LLC.
 * All Rights Reserved. 
 * 
 * THIS IS A LEGAL DOCUMENT, BY USING SIMPLESCALAR,
 * YOU ARE AGREEING TO THESE TERMS AND CONDITIONS.
 * 
 * No portion of this work may be used by any commercial entity, or for any
 * commercial purpose, without the prior, written permission of SimpleScalar,
 * LLC (info@simplescalar.com). Nonprofit and noncommercial use is permitted
 * as described below.
 * 
 * 1. SimpleScalar is provided AS IS, with no warranty of any kind, express
 * or implied. The user of the program accepts full responsibility for the
 * application of the program and the use of any results.
 * 
 * 2. Nonprofit and noncommercial use is encouraged. SimpleScalar may be
 * downloaded, compiled, executed, copied, and modified solely for nonprofit,
 * educational, noncommercial research, and noncommercial scholarship
 * purposes provided that this notice in its entirety accompanies all copies.
 * Copies of the modified software can be delivered to persons who use it
 * solely for nonprofit, educational, noncommercial research, and
 * noncommercial scholarship purposes provided that this notice in its
 * entirety accompanies all copies.
 * 
 * 3. ALL COMMERCIAL USE, AND ALL USE BY FOR PROFIT ENTITIES, IS EXPRESSLY
 * PROHIBITED WITHOUT A LICENSE FROM SIMPLESCALAR, LLC (info@simplescalar.com).
 * 
 * 4. No nonprofit user may place any restrictions on the use of this software,
 * including as modified by the user, by any other authorized user.
 * 
 * 5. Noncommercial and nonprofit users may distribute copies of SimpleScalar
 * in compiled or executable form as set forth in Section 2, provided that
 * either: (A) it is accompanied by the corresponding machine-readable source
 * code, or (B) it is accompanied by a written offer, with no time limit, to
 * give anyone a machine-readable copy of the corresponding source code in
 * return for reimbursement of the cost of distribution. This written offer
 * must permit verbatim duplication by anyone, or (C) it is distributed by
 * someone who received only the executable form, and is accompanied by a
 * copy of the written offer of source code.
 * 
 * 6. SimpleScalar was developed by Todd M. Austin, Ph.D. The tool suite is
 * currently maintained by SimpleScalar LLC (info@simplescalar.com). US Mail:
 * 2395 Timbercrest Court, Ann Arbor, MI 48105.
 * 
 * Copyright (C) 1994-2003 by Todd M. Austin, Ph.D. and SimpleScalar, LLC.
 */

#include "isim_system.h"

isim_system::isim_system()
{}


//$B%b%8%e!<%k4V$N@\B3(B
void
isim_system::initialize(unsigned int punum, int portnum)
{
  soo.resize(punum);   //$B;XDj$5$l$?J,$N(BPU$B$rMQ0U(B
  shared_mem_isis.set_top(0x80000000); //$B%a%b%j$N@hF,%"%I%l%9$r@_Dj(B
  shared_mem_isis.resize(0x7fffffff);  //$B%a%b%j$N%5%$%:$r@_Dj(B
  shmem.connect_mem(shared_mem_isis);  //$B6&M-%a%b%j%f%K%C%H$K%a%b%j$r@\B3(B
  shbus.set_each_pu_port_num(portnum); //$B6&M-%P%9$K(BPU$BKh$N%]!<%H?t$rDLCN(B
  shbus.set_num_of_pu_port(portnum*punum); //$B6&M-%P%9$N(BPU$BB&$N%]!<%H$r:n@.(B
  //$B6&M-%a%b%j$N%]!<%H$H6&M-%P%9$N%a%b%jB&$N%]!<%H$r@\B3(B
  shmem.ref_shbus_port().connect(shbus.ref_mm_port()); 
  for ( unsigned int i = 0; i < punum; i++ ) {
	soo[i].set_puid(i);  //$B%W%m%;%C%5(BID$B$r3d$j?6$k(B
	soo[i].set_punum(punum); //$B3F(BPU$B$K%7%9%F%`>e$N(BPU$B?t$rDLCN(B
	soo[i].set_port(portnum); //$B3F(BPU$B$N30ItDL?.MQ%]!<%H$r:n@.(B
	for ( int p = 0; p < portnum; p++ ) {
	  //$B6&M-%P%9$N(BPU$BB&$N%]!<%H$H(BPU$B$N30ItDL?.MQ%]!<%H$r@\B3(B
	  shbus.ref_to_pu_port(i*portnum+p).connect(soo[i].ref_to_pu_port(p));
	  shbus.ref_from_pu_port(i*portnum+p).connect(soo[i].ref_from_pu_port(p));
	}
  }
}

//$B%3%^%s%I%i%$%s%*%W%7%g%s(B
void
isim_system::sim_reg_options(struct opt_odb_t *odb)
{
  op_simple->opt_reg_header(odb, 
							"sim-outorder: This simulator implements a very detailed out-of-order issue\n"
							"superscalar processor with a two-level memory system and speculative\n"
							"execution support.  This simulator is a performance simulator, tracking the\n"
							"latency of all pipeline operations.\n"
							);
  
  /* $B%W%m%;%C%5?t$N;XDj$K4X$9$k@bL@(B */
  op_simple->opt_reg_note(odb,
						  " How to define the number of processors:\n"
						  "    -p 1 : one processor\n"
						  "    -p 8 : eight processors\n"
						  );

  /* branch predictor$B$K4X$9$k@bL@(B */
  op_simple->opt_reg_note(odb,
						  " Branch predictor configuration examples for 2-level predictor:\n"
						  "    Configurations:   N, M, W, X\n"
						  "      N   # entries in first level (# of shift register(s))\n"
						  "      W   width of shift register(s)\n"
						  "      M   # entries in 2nd level (# of counters, or other FSM)\n"
						  "      X   (yes-1/no-0) xor history and address for 2nd level index\n"
						  "    Sample predictors:\n"
						  "      GAg     : 1, W, 2^W, 0\n"
						  "      GAp     : 1, W, M (M > 2^W), 0\n"
						  "      PAg     : N, W, 2^W, 0\n"
						  "      PAp     : N, W, M (M == 2^(N+W)), 0\n"
						  "      gshare  : 1, W, 2^W, 1\n"
						  "   Predictor `comb' combines a bimodal and a 2-level predictor.\n"
						  );
  
  //$B%-%c%C%7%e$K4X$9$k@bL@(B */
  op_simple->opt_reg_note(odb,
						  " The cache config parameter <config> has the following format:\n"
						  "    <name>:<nsets>:<bsize>:<assoc>:<repl>\n"
						  "\n"
						  "    <name>   - name of the cache being defined\n"
						  "    <nsets>  - number of sets in the cache\n"
						  "    <bsize>  - block size of the cache\n"
						  "    <assoc>  - associativity of the cache\n"
						  "    <repl>   - block replacement strategy, 'l'-LRU, 'f'-FIFO, 'r'-random\n"
						  "\n"
						  "    Examples:   -cache:dl1 dl1:4096:32:1:l\n"
						  "                -dtlb dtlb:128:4096:32:r\n"
						  );

  //$B%-%c%C%7%e$K4X$9$k@bL@(B */
  op_simple->opt_reg_note(odb,
						  " Cache levels can be unified by pointing a level of the instruction cache\n"
						  " hierarchy at the data cache hiearchy using the \"dl1\" and \"dl2\" cache\n"
						  " configuration arguments.  Most sensible combinations are supported, e.g.,\n"
						  "\n"
						  "    A unified l2 cache (il2 is pointed at dl2):\n"
						  "      -cache:il1 il1:128:64:1:l -cache:il2 dl2\n"
						  "      -cache:dl1 dl1:256:32:1:l -cache:dl2 ul2:1024:64:2:l\n"
						  "\n"
						  "    Or, a fully unified cache hierarchy (il1 pointed at dl1):\n"
						  "      -cache:il1 dl1\n"
						  "      -cache:dl1 ul1:256:32:1:l -cache:dl2 ul2:1024:64:2:l\n"
						  );

  for ( unsigned int i = 0; i < soo.size(); i++ ) {

	bool print_flag;
	if ( i == 0 )
	  print_flag = true;
	else
	  print_flag = false;	  
	  
	/* $B;XDj$5$l$??t$NL?Na$r<B9T$7$?;~E@$G<B9T$r=*N;$9$k(B */
	op_simple->opt_reg_uint(odb, "-max:inst",
							"maximum number of inst's to execute",
							&soo[i].max_insts, 0,
							print_flag, NULL, i);

	/* trace options */
	op_simple->opt_reg_int(odb,
						   "-fastfwd",
						   "number of insts skipped before timing starts",
						   &soo[i].fastfwd_count, 0,
						   print_flag, NULL, i);

	/* IFQ(fetch$B$5$l$?L?Na$rJ];}$9$k(B)$B$N%5%$%:$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb,
						   "-fetch:ifqsize",
						   "instruction fetch queue size(in insts)",
						   &soo[i].ruu_ifq_size, 4,
						   print_flag, NULL, i);	

	/* $BL?Na(Bfetch$BI}$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-fetch:speed",
						   "speed of front-end of machine relative to execution core",
						   &soo[i].fetch_speed, 1,
						   print_flag, NULL, i);

	/* branch predictor$B$r;XDj$9$k(B */
	op_simple->opt_reg_string(odb, "-bpred",
							  "branch predictor type {nottaken|taken|bimod|2lev|comb}",
							  &soo[i].pred_type, "comb",
							  print_flag, NULL, i);

	/* $B%F!<%V%k$N%5%$%:$r;XDj$9$k(B
	   ($B"((B -bpred bimod$B$r;XDj$7$?;~$N$_M-8z(B) */
	op_simple->opt_reg_int_list(odb, "-bpred:bimod",
								"bimodal predictor config (<table size>)",
								soo[i].bimod_config, soo[i].bimod_nelt, 
								&soo[i].bimod_nelt,
								soo[i].bimod_config,
								print_flag, NULL,
								FALSE, i);

	/* 2-level predictor$B$N@_Dj$r9T$&(B
	   ($B"((B -bpred 2lev$B$r;XDj$7$?;~$N$_M-8z(B)
	   
	   <l1size>  L1$B%F!<%V%k(B($BJ,4tMzNr%F!<%V%k(B)$B$N%5%$%:(B
	   <l2size>  L2$B%F!<%V%k(B(2$B%S%C%H%+%&%s%?(B)$B$N%5%$%:(B
	   <hist_list>  $BJ,4tMzNr$NI}(B
	   <xor>  L2$B%F!<%V%k$K$*$$$F!"MzNr$H%"%I%l%9$N(BXOR$B$r9T$&$+$I$&$+(B
	*/
	op_simple->opt_reg_int_list(odb, "-bpred:2lev",
								"2-level predictor config "
								"(<l1size> <l2size> <hist_size> <xor>)",
								soo[i].twolev_config,
								soo[i].twolev_nelt, &soo[i].twolev_nelt,
								soo[i].twolev_config,
								print_flag,
								NULL,
								FALSE, i);	

	/* bimodal predictor$B$H(B2-level predictor$B$G6&M-$9$k(B
	   $B%F!<%V%k$N%5%$%:$r;XDj$9$k(B */
	op_simple->opt_reg_int_list(odb, "-bpred:comb",
								"combining predictor config (<meta_table_size>)",
								soo[i].comb_config, soo[i].comb_nelt,
								&soo[i].comb_nelt,
								soo[i].comb_config,
								print_flag, NULL,
								FALSE, i);	

	op_simple->opt_reg_int(odb, "-bpred:ras",
						   "return address stack size (0 for no return stack)",
						   &soo[i].ras_size, soo[i].ras_size,
						   print_flag, NULL, i);	

	/* Branch Target Buffer$B$N$N@_Dj$r9T$&(B */
	op_simple->opt_reg_int_list(odb, "-bpred:btb",
								"BTB config (<num_sets> <associativity>)",
								soo[i].btb_config,
								soo[i].btb_nelt, &soo[i].btb_nelt,
								soo[i].btb_config,
								print_flag,
								NULL,
								FALSE, i);

	/* $BL?Na(Bdecode$BI}$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-decode:width",
						   "instruction decode B/W (insts/cycle)",
						   &soo[i].ruu_decode_width, 4,
						   print_flag, NULL, i);	

	/* $BL?Na(Bissue$BI}$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-issue:width",
						   "instruction issue B/W (insts/cycle)",
						   &soo[i].ruu_issue_width, 4,
						   print_flag, NULL, i);

	/* $BL?Na(Bcommit$BI}$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-commit:width",
						   "instruction commit B/W (insts/cycle)",
						   &soo[i].ruu_commit_width, 4,
						   print_flag, NULL, i);
  
	/* RUU ($BL?Na$,(Bfetch$B$5$l$F$+$i(Bcommit$B$5$l$k$^$GL?Na$rJ];}$9$k(B) 
	   $B$N%5%$%:$r;XDj$9$k(B */ 
	op_simple->opt_reg_int(odb, "-ruu:size",
						   "register update unit (RUU) size",
						   &soo[i].RUU_size, 16,
						   print_flag, NULL, i);

	/* LSQ (Load$BL?Na(B/Store$BL?Na$rJ];}$9$k(B) $B$N%5%$%:$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-lsq:size",
						   "load/store queue (LSQ) size",
						   &soo[i].LSQ_size, 8,
						   print_flag, NULL, i);

	/* L1$B%m!<%+%k%G!<%?%-%c%C%7%e$r@_Dj$9$k(B */
	op_simple->opt_reg_string(odb, "-cache:dl1",
							  "l1 data cache config, i.e., {<config>|none}",
							  &soo[i].cache_dl1_opt, "dl1:128:32:4:l",
							  print_flag, NULL, i);

	/* L1$B%m!<%+%k%G!<%?%-%c%C%7%e$K%R%C%H$7$?$H$-$N%l%$%F%s%7$r;XDj(B */
	op_simple->opt_reg_int(odb, "-cache:dl1lat",
						   "l1 data cache hit latency (in cycles)",
						   &soo[i].cache_dl1_lat, 1,
						   print_flag, NULL, i);	

	/* L2$B%m!<%+%k%G!<%?%-%c%C%7%e$r@_Dj$9$k(B */
	op_simple->opt_reg_string(odb, "-cache:dl2",
							  "l2 data cache config, i.e., {<config>|none}",
							  &soo[i].cache_dl2_opt, "ul2:1024:64:4:l",
							  print_flag, NULL, i);	

	/* L2$B%m!<%+%k%G!<%?%-%c%C%7%e$K%R%C%H$7$?$H$-$N%l%$%F%s%7$r;XDj(B */
	op_simple->opt_reg_int(odb, "-cache:dl2lat",
						   "l2 data cache hit latency (in cycles)",
						   &soo[i].cache_dl2_lat, 6,
						   print_flag, NULL, i);	

	/* L1$B%m!<%+%kL?Na%-%c%C%7%e$r@_Dj$9$k(B */
	op_simple->opt_reg_string(odb, "-cache:il1",
							  "l1 inst cache config, i.e., {<config>|dl1|dl2|none}",
							  &soo[i].cache_il1_opt, "il1:512:32:1:l",
							  print_flag, NULL, i);

	/* L1$B%m!<%+%kL?Na%-%c%C%7%e$K%R%C%H$7$?$H$-$N%l%$%F%s%7$r;XDj(B */
	op_simple->opt_reg_int(odb, "-cache:il1lat",
						   "l1 instruction cache hit latency (in cycles)",
						   &soo[i].cache_il1_lat, 1,
						   print_flag, NULL, i);	

	/* L2$B%m!<%+%kL?Na%-%c%C%7%e$r@_Dj$9$k(B */
	op_simple->opt_reg_string(odb, "-cache:il2",
							  "l2 instruction cache config, i.e., {<config>|dl2|none}",
							  &soo[i].cache_il2_opt, "dl2",
							  print_flag, NULL, i);	

	/* L2$B%m!<%+%kL?Na%-%c%C%7%e$K%R%C%H$7$?$H$-$N%l%$%F%s%7$r;XDj(B */
	op_simple->opt_reg_int(odb, "-cache:il2lat",
						   "l2 instruction cache hit latency (in cycles)",
						   &soo[i].cache_il2_lat, 6,
						   print_flag, NULL, i);	

	/* 64$B%S%C%H$NL?Na%"%I%l%9$r(B32$B%S%C%H$KCV$-BX$($k(B */
	op_simple->opt_reg_flag(odb, "-cache:icompress",
							"convert 64-bit inst addresses to 32-bit inst equivalents",
							&soo[i].compress_icache_addrs, FALSE,
							print_flag, NULL, i);

	/* $B%m!<%+%k%a%b%j$X$N%"%/%;%9%l%$%F%s%7$r;XDj$9$k(B */
	op_simple->opt_reg_int_list(odb, "-mem:lat",
								"memory access latency (<first_chunk> <inter_chunk>)",
								soo[i].mem_lat, soo[i].mem_nelt,
								&soo[i].mem_nelt, soo[i].mem_lat,
								print_flag, NULL,
								FALSE, i);	

	/* $B%a%b%j$N%P%9I}(B(bytes) */
	op_simple->opt_reg_int(odb, "-mem:width",
						   "memory access bus width (in bytes)",
						   &soo[i].mem_bus_width, 8,
						   print_flag, NULL, i);

	/* $BL?Na(BTLB($B%"%I%l%9JQ49%P%C%U%!(B)$B$r@_Dj$9$k(B */
	op_simple->opt_reg_string(odb, "-tlb:itlb",
							  "instruction TLB config, i.e., {<config>|none}",
							  &soo[i].itlb_opt, "itlb:16:4096:4:l",
							  print_flag, NULL, i);

	/* $B%G!<%?(BTLB$B$r@_Dj$9$k(B */
	op_simple->opt_reg_string(odb, "-tlb:dtlb",
							  "data TLB config, i.e., {<config>|none}",
							  &soo[i].dtlb_opt, "dtlb:32:4096:4:l",
							  print_flag, NULL, i);

	/* TLB$B%_%9;~$N%l%$%F%s%7$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-tlb:lat",
						   "inst/data TLB miss latency (in cycles)",
						   &soo[i].tlb_miss_lat, 30,
						   print_flag, NULL, i);

	/* $B@0?t(BALU$B$N?t$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-res:ialu",
						   "total number of integer ALU's available",
						   &soo[i].res_ialu,
						   soo[i].fu_config[FU_IALU_INDEX].quantity,
						   print_flag, NULL, i);

	/* $B@0?t>h=|;;4o$N?t$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-res:imult",
						   "total number of integer multiplier/dividers available",
						   &soo[i].res_imult,
						   soo[i].fu_config[FU_IMULT_INDEX].quantity,
						   print_flag, NULL, i);

	/* $B%m!<%+%k%a%b%j%"%/%;%9%]!<%H$N?t$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-res:memport",
						   "total number of local memory system ports available (to CPU)",
						   &soo[i].res_memport, 
						   soo[i].fu_config[FU_MEMPORT_INDEX].quantity,
						   print_flag, NULL, i);

	/* $BIbF0>.?tE@(BALU$B$N?t$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-res:fpalu",
						   "total number of floating point ALU's available",
						   &soo[i].res_fpalu, 
						   soo[i].fu_config[FU_FPALU_INDEX].quantity,
						   print_flag, NULL, i);

	/* $BIbF0>.?tE@>h=|;;4o$N?t$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-res:fpmult",
						   "total number of floating point multiplier/dividers available",
						   &soo[i].res_fpmult,
						   soo[i].fu_config[FU_FPMULT_INDEX].quantity,
						   print_flag, NULL, i);
	
	/* $B%W%m%;%C%5$,30It%7%9%F%`$HDL?.$9$k$?$a$N%]!<%H?t$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-o",
						   "total number of ports to contact with outside system",
						   &soo[i].outside_port_num,
						   2, print_flag, NULL, i);

	/* 0xd0000000$B$X$N%"%/%;%9$,$"$C$?$i%-%c%C%7%e$NFbMF$r>C5n$7!"(B
	   0xd0000100$B$X$N%"%/%;%9$,$"$C$?$i%-%c%C%7%e$NFbMF$r85$KLa$9(B
	   $BFCDjItJ,E}7W%G!<%?<hF@;~$K8B$C$?%-%c%C%7%e$K4X$9$k(B
	   $BE}7W%G!<%?$,<hF@$G$-$k(B */
	op_simple->opt_reg_flag(odb, "-cache:flush",
							"caches are flushed just before particular-time (access 0xd0000000)",
							&soo[i].cache_flush_flag, TRUE,
							print_flag, NULL, i);

	/* $BE}7W%G!<%?$N=PNOJ}K!$r;XDj$9$k(B */
	op_simple->opt_reg_int(odb, "-output",
						   "print type of statistics {0:not|1:particular-time|2:full-time|3:all}",
						   &output_condition,
						   3, print_flag, NULL, i);
  }
}


/* check simulator-specific option values */
void
isim_system::sim_check_options(struct opt_odb_t *odb,    /* options database */
							   int argc, char **argv)/*command line arguments*/
{
  char name[128], c;
  int nsets, bsize, assoc;

  for ( unsigned int i = 0; i < soo.size(); i++ ) {
  
	if (soo[i].ruu_ifq_size < 1 
		|| (soo[i].ruu_ifq_size & (soo[i].ruu_ifq_size - 1)) != 0)
	  fatal("inst fetch queue size must be positive > 0 and a power of two");
   
	if (soo[i].fetch_speed < 1)
	  fatal("front-end speed must be positive and non-zero");

	if (!mystricmp(soo[i].pred_type, "taken")) {
	  /* static predictor, not taken */
	  soo[i].pred = soo[i].bpred.bpred_create(BPredTaken, 0, 0, 0, 0, 0, 0, 0, 0, 0);
	} else if (!mystricmp(soo[i].pred_type, "nottaken")) {
	  /* static predictor, taken */
	  soo[i].pred = soo[i].bpred.bpred_create(BPredNotTaken, 0, 0, 0, 0, 0, 0, 0, 0, 0);
	} else if (!mystricmp(soo[i].pred_type, "bimod")) {
	  /* bimodal predictor, bpred_create() checks BTB_SIZE */
	  if (soo[i].bimod_nelt != 1)
		fatal("bad bimod predictor config (<table_size>)");
	  if (soo[i].btb_nelt != 2)
		fatal("bad btb config (<num_sets> <associativity>)");
	  
	  /* bimodal predictor, bpred_create() checks BTB_SIZE */
	  soo[i].pred = soo[i].bpred.bpred_create(BPred2bit,
											  /* bimod table size */
											  soo[i].bimod_config[0],
											  /* 2lev l1 size */0,
											  /* 2lev l2 size */0,
											  /* meta table size */0,
											  /* history reg size */0,
											  /* history xor address */0,
											  /* btb sets */
											  soo[i].btb_config[0],
											  /* btb assoc */
											  soo[i].btb_config[1],
											  /* ret-addr stack size */
											  soo[i].ras_size);
	} else if (!mystricmp(soo[i].pred_type, "2lev")) {
	  /* 2-level adaptive predictor, bpred_create() checks args */
	  if (soo[i].twolev_nelt != 4)
		fatal("bad 2-level pred config (<l1size> <l2size> <hist_size> <xor>)");
	  if (soo[i].btb_nelt != 2)
		fatal("bad btb config (<num_sets> <associativity>)");
	  
	  soo[i].pred = soo[i].bpred.bpred_create(BPred2Level,
											  /* bimod table size */0,
											  /* 2lev l1 size */
											  soo[i].twolev_config[0],
											  /* 2lev l2 size */
											  soo[i].twolev_config[1],
											  /* meta table size */0,
											  /* history reg size */
											  soo[i].twolev_config[2],
											  /* history xor address */
											  soo[i].twolev_config[3],
											  /* btb sets */
											  soo[i].btb_config[0],
											  /* btb assoc */
											  soo[i].btb_config[1],
											  /* ret-addr stack size */
											  soo[i].ras_size);
	} else if (!mystricmp(soo[i].pred_type, "comb")) {
	  /* combining predictor, bpred_create() checks args */
	  if (soo[i].twolev_nelt != 4)
		fatal("bad 2-level pred config (<l1size> <l2size> <hist_size> <xor>)");
	  if (soo[i].bimod_nelt != 1)
		fatal("bad bimod predictor config (<table_size>)");
	  if (soo[i].comb_nelt != 1)
		fatal("bad combining predictor config (<meta_table_size>)");
	  if (soo[i].btb_nelt != 2)
		fatal("bad btb config (<num_sets> <associativity>)");
		
	  soo[i].pred = soo[i].bpred.bpred_create(BPredComb,
											  /* bimod table size */
											  soo[i].bimod_config[0],
											  /* l1 size */
											  soo[i].twolev_config[0],
											  /* l2 size */
											  soo[i].twolev_config[1],
											  /* meta table size */
											  soo[i].comb_config[0],
											  /* history reg size */
											  soo[i].twolev_config[2],
											  /* history xor address */
											  soo[i].twolev_config[3],
											  /* btb sets */
											  soo[i].btb_config[0],
											  /* btb assoc */
											  soo[i].btb_config[1],
											  /* ret-addr stack size */
											  soo[i].ras_size);
	}
	else
	  fatal("cannot parse predictor type `%s'", soo[i].pred_type);
	
	if (soo[i].ruu_decode_width < 1
		|| (soo[i].ruu_decode_width & (soo[i].ruu_decode_width-1)) != 0)
	  fatal("issue width must be positive non-zero and a power of two");
	
	if (soo[i].ruu_issue_width < 1
		|| (soo[i].ruu_issue_width & (soo[i].ruu_issue_width-1)) != 0)
	  fatal("issue width must be positive non-zero and a power of two");
	
	if (soo[i].ruu_commit_width < 1)
	  fatal("commit width must be positive non-zero");
	
	if (soo[i].RUU_size < 2 || (soo[i].RUU_size & (soo[i].RUU_size-1)) != 0)
	  fatal("RUU size must be a positive number > 1 and a power of two");
	
	if (soo[i].LSQ_size < 2 || (soo[i].LSQ_size & (soo[i].LSQ_size-1)) != 0)
	  fatal("LSQ size must be a positive number > 1 and a power of two");
	
	/* use a level 1 D-cache? */
	if (!mystricmp(soo[i].cache_dl1_opt, "none")) {
	  soo[i].cache_dl1 = NULL;
	  soo[i].s_cache_dl1 = NULL;
	  
	  /* the level 2 D-cache cannot be defined */
	  if (strcmp(soo[i].cache_dl2_opt, "none"))
		fatal("the l1 data cache must defined if the l2 cache is defined");
	  soo[i].cache_dl2 = NULL;
	  soo[i].s_cache_dl2 = NULL;
	} else /* dl1 is defined */ {
	  if (sscanf(soo[i].cache_dl1_opt, "%[^:]:%d:%d:%d:%c",
				 name, &nsets, &bsize, &assoc, &c) != 5)
		fatal("bad l1 D-cache parms: <name>:<nsets>:<bsize>:<assoc>:<repl>");
	  
	  soo[i].cache_dl1
		= soo[i].cache.cache_create(name, nsets, bsize, 
									/* balloc */FALSE,
									/* usize */0, assoc, 
									soo[i].cache.cache_char2policy(c),
									//dl1_access_fn,
									l1_data_cache,
									/* hit lat */soo[i].cache_dl1_lat);
	  
	  soo[i].s_cache_dl1
		= soo[i].stat_cache.cache_create("pts_dl1", nsets, bsize, 
										 /* balloc */FALSE,
										 /* usize */0, assoc, 
										 soo[i].stat_cache.cache_char2policy(c),
										 //dl1_access_fn,
										 l1_data_cache,
										 /* hit lat */soo[i].cache_dl1_lat);
	  
	  /* is the level 2 D-cache defined? */
	  if (!mystricmp(soo[i].cache_dl2_opt, "none")) {
		soo[i].cache_dl2 = NULL;
		soo[i].s_cache_dl2 = NULL;
	  } else {
		if (sscanf(soo[i].cache_dl2_opt, "%[^:]:%d:%d:%d:%c",
				   name, &nsets, &bsize, &assoc, &c) != 5)
		  fatal("bad l2 D-cache parms: "
				"<name>:<nsets>:<bsize>:<assoc>:<repl>");
		soo[i].cache_dl2
		  = soo[i].cache.cache_create(name, nsets, bsize,
									  /* balloc */FALSE,
									  /* usize */0, assoc,
									  soo[i].cache.cache_char2policy(c),
									  //dl2_access_fn,
									  l2_data_cache,
									  /* hit lat */soo[i].cache_dl2_lat);
		soo[i].s_cache_dl2
		  = soo[i].stat_cache.cache_create("pts_dl2", nsets, bsize,
										   /* balloc */FALSE,
										   /* usize */0, assoc,
										   soo[i].stat_cache.cache_char2policy(c),
										   //dl2_access_fn,
										   l2_data_cache,
										   /* hit lat */soo[i].cache_dl2_lat);
	  }
	}
	
	/* use a level 1 I-cache? */
	if (!mystricmp(soo[i].cache_il1_opt, "none")) {
	  soo[i].cache_il1 = NULL;
	  soo[i].s_cache_il1 = NULL;
	  
	  /* the level 2 I-cache cannot be defined */
	  if (strcmp(soo[i].cache_il2_opt, "none"))
		fatal("the l1 inst cache must defined if the l2 cache is defined");
	  soo[i].cache_il2 = NULL;
	  soo[i].s_cache_il2 = NULL;
	} else if (!mystricmp(soo[i].cache_il1_opt, "dl1")) {
	  if (!soo[i].cache_dl1)
		fatal("I-cache l1 cannot access D-cache l1 as it's undefined");
	  soo[i].cache_il1 = soo[i].cache_dl1;
	  soo[i].s_cache_il1 = soo[i].s_cache_dl1;
	  
	  /* the level 2 I-cache cannot be defined */
	  if (strcmp(soo[i].cache_il2_opt, "none"))
		fatal("the l1 inst cache must defined if the l2 cache is defined");
	  soo[i].cache_il2 = NULL;
	  soo[i].s_cache_il2 = NULL;
	} else if (!mystricmp(soo[i].cache_il1_opt, "dl2")) {
	  if (!soo[i].cache_dl2)
		fatal("I-cache l1 cannot access D-cache l2 as it's undefined");
	  soo[i].cache_il1 = soo[i].cache_dl2;
	  soo[i].s_cache_il1 = soo[i].s_cache_dl2;

	  /* the level 2 I-cache cannot be defined */
	  if (strcmp(soo[i].cache_il2_opt, "none"))
		fatal("the l1 inst cache must defined if the l2 cache is defined");
	  soo[i].cache_il2 = NULL;
	  soo[i].s_cache_il2 = NULL;
	} else /* il1 is defined */ {
	  if (sscanf(soo[i].cache_il1_opt, "%[^:]:%d:%d:%d:%c",
				 name, &nsets, &bsize, &assoc, &c) != 5)
		fatal("bad l1 I-cache parms: <name>:<nsets>:<bsize>:<assoc>:<repl>");
	  soo[i].cache_il1
		= soo[i].cache.cache_create(name, nsets, bsize,
									/* balloc */FALSE,
									/* usize */0, assoc,
									soo[i].cache.cache_char2policy(c),
									//il1_access_fn,
									l1_inst_cache,
									/* hit lat */soo[i].cache_il1_lat);
	  soo[i].s_cache_il1
		= soo[i].stat_cache.cache_create("pts_il1", nsets, bsize,
										 /* balloc */FALSE,
										 /* usize */0, assoc,
										 soo[i].stat_cache.cache_char2policy(c),
										 //il1_access_fn,
										 l1_inst_cache,
										 /* hit lat */soo[i].cache_il1_lat);

	  /* is the level 2 D-cache defined? */
	  if (!mystricmp(soo[i].cache_il2_opt, "none")) {
		soo[i].cache_il2 = NULL;
		soo[i].s_cache_il2 = NULL;
	  } else if (!mystricmp(soo[i].cache_il2_opt, "dl2")) {
		if (!soo[i].cache_dl2)
		  fatal("I-cache l2 cannot access D-cache l2 as it's undefined");
		soo[i].cache_il2 = soo[i].cache_dl2;
		soo[i].s_cache_il2 = soo[i].s_cache_dl2;
	  } else {
		if (sscanf(soo[i].cache_il2_opt, "%[^:]:%d:%d:%d:%c",
				   name, &nsets, &bsize, &assoc, &c) != 5)
		  fatal("bad l2 I-cache parms: "
				"<name>:<nsets>:<bsize>:<assoc>:<repl>");
		soo[i].cache_il2
		  = soo[i].cache.cache_create(name, nsets, bsize,
									  /* balloc */FALSE,
									  /* usize */0, assoc,
									  soo[i].cache.cache_char2policy(c),
									  //il2_access_fn,
									  l2_inst_cache,
									  /* hit lat */soo[i].cache_il2_lat);
		soo[i].s_cache_il2
		  = soo[i].stat_cache.cache_create("pts_il2", nsets, bsize,
										   /* balloc */FALSE,
										   /* usize */0, assoc,
										   soo[i].stat_cache.cache_char2policy(c),
										   //il2_access_fn,
										   l2_inst_cache,
										   /* hit lat */soo[i].cache_il2_lat);
	  }
	}

	/* use an I-TLB? */
	if (!mystricmp(soo[i].itlb_opt, "none")) {
	  soo[i].itlb = NULL;
	  soo[i].s_itlb = NULL;
	} else {
	  if (sscanf(soo[i].itlb_opt, "%[^:]:%d:%d:%d:%c",
				 name, &nsets, &bsize, &assoc, &c) != 5)
		fatal("bad TLB parms: <name>:<nsets>:<page_size>:<assoc>:<repl>");
	  soo[i].itlb
		= soo[i].cache.cache_create(name, nsets, bsize, /* balloc */FALSE,
									/* usize */sizeof(md_addr_t), assoc,
									soo[i].cache.cache_char2policy(c),
									//itlb_access_fn,
									inst_tlb,
									/* hit latency */1);
	  soo[i].s_itlb
		= soo[i].stat_cache.cache_create("pts_itlb", nsets,
										 bsize, /* balloc */FALSE,
										 /* usize */sizeof(md_addr_t), assoc,
										 soo[i].stat_cache.cache_char2policy(c),
										 //itlb_access_fn,
										 inst_tlb,
										 /* hit latency */1);
	}

	/* use a D-TLB? */
	if (!mystricmp(soo[i].dtlb_opt, "none")) {
	  soo[i].dtlb = NULL;
	  soo[i].s_dtlb = NULL;
	} else {
	  if (sscanf(soo[i].dtlb_opt, "%[^:]:%d:%d:%d:%c",
				 name, &nsets, &bsize, &assoc, &c) != 5)
		fatal("bad TLB parms: <name>:<nsets>:<page_size>:<assoc>:<repl>");
	  soo[i].dtlb
		= soo[i].cache.cache_create(name, nsets, bsize, /* balloc */FALSE,
									/* usize */sizeof(md_addr_t), assoc,
									soo[i].cache.cache_char2policy(c),
									//dtlb_access_fn,
									data_tlb,
									/* hit latency */1);
	  soo[i].s_dtlb
		= soo[i].stat_cache.cache_create("pts_dtlb", nsets,
										 bsize, /* balloc */FALSE,
										 /* usize */sizeof(md_addr_t), assoc,
										 soo[i].stat_cache.cache_char2policy(c),
										 //dtlb_access_fn,
										 data_tlb,
										 /* hit latency */1);
	}

	if (soo[i].cache_dl1_lat < 1)
	  fatal("l1 data cache latency must be greater than zero");

	if (soo[i].cache_dl2_lat < 1)
	  fatal("l2 data cache latency must be greater than zero");

	if (soo[i].cache_il1_lat < 1)
	  fatal("l1 instruction cache latency must be greater than zero");

	if (soo[i].cache_il2_lat < 1)
	  fatal("l2 instruction cache latency must be greater than zero");

	if (soo[i].mem_nelt != 2)
	  fatal("bad memory access latency (<first_chunk> <inter_chunk>)");

	if (soo[i].mem_lat[0] < 1 || soo[i].mem_lat[1] < 1)
	  fatal("all memory access latencies must be greater than zero");

	if (soo[i].mem_bus_width < 1
		|| (soo[i].mem_bus_width & (soo[i].mem_bus_width-1)) != 0)
	  fatal("memory bus width must be positive non-zero and a power of two");
  
	if (soo[i].tlb_miss_lat < 1)
	  fatal("TLB miss latency must be greater than zero");

	if (soo[i].res_ialu < 1)
	  fatal("number of integer ALU's must be greater than zero");
	if (soo[i].res_ialu > MAX_INSTS_PER_CLASS)
	  fatal("number of integer ALU's must be <= MAX_INSTS_PER_CLASS");
	soo[i].fu_config[FU_IALU_INDEX].quantity = soo[i].res_ialu;
  
	if (soo[i].res_imult < 1)
	  fatal("number of integer multiplier/dividers must be greater than zero");
	if (soo[i].res_imult > MAX_INSTS_PER_CLASS)
	  fatal("number of integer mult/div's must be <= MAX_INSTS_PER_CLASS");
	soo[i].fu_config[FU_IMULT_INDEX].quantity = soo[i].res_imult;
  
	if (soo[i].res_memport < 1)
	  fatal("number of memory system ports must be greater than zero");
	if (soo[i].res_memport > MAX_INSTS_PER_CLASS)
	  fatal("number of memory system ports must be <= MAX_INSTS_PER_CLASS");
	soo[i].fu_config[FU_MEMPORT_INDEX].quantity = soo[i].res_memport;
  
	if (soo[i].res_fpalu < 1)
	  fatal("number of floating point ALU's must be greater than zero");
	if (soo[i].res_fpalu > MAX_INSTS_PER_CLASS)
	  fatal("number of floating point ALU's must be <= MAX_INSTS_PER_CLASS");
	soo[i].fu_config[FU_FPALU_INDEX].quantity = soo[i].res_fpalu;
  
	if (soo[i].res_fpmult < 1)
	  fatal("number of floating point multiplier/dividers must be > zero");
	if (soo[i].res_fpmult > MAX_INSTS_PER_CLASS)
	  fatal("number of FP mult/div's must be <= MAX_INSTS_PER_CLASS");
	soo[i].fu_config[FU_FPMULT_INDEX].quantity = soo[i].res_fpmult;
  }
}

/* print simulator-specific configuration information */
void
isim_system::sim_aux_config(FILE *stream)            /* output stream */
{}

/* register simulator-specific statistics */
void
isim_system::sim_reg_stats(struct stat_sdb_t *sdb)   /* stats database */
{
  if ( output_condition != 0 ) {
	for ( unsigned int i = 0; i < soo.size(); i++ ) {
	  soo[i].st_simple->stat_reg_comment(sdb,
										 "------------ statistics of processor ",
										 "------------", &soo[i].puid);
	  if ( output_condition == 2 || output_condition == 3 ) {
		soo[i].st_simple->stat_reg_comment2(sdb,
											"             = full-time statistics =");
		soo[i].st_simple->stat_reg_counter(sdb, "load_local_data",
										   "total of loads executed(local data)",
										   &soo[i].local_read_access, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "store_local_data",
										   "total of stores executed(local data)",
										   &soo[i].local_write_access, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "load_shared_data",
										   "total of loads executed(shared data)",
										   &soo[i].shared_read_access, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "store_shared_data",
										   "total of stores executed(shared data)",
										   &soo[i].shared_write_access, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "load_sync_data",
										   "total of loads executed(sync data)",
										   &soo[i].sync_read_access, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "store_sync_data",
										   "total of stores executed(sync data)",
										   &soo[i].sync_write_access, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "other_insts",
										   "total of executed insts except for loads and stores",
										   &soo[i].other_inst, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "sim_total_insn",
										   "total of instructions dispatched",
										   &soo[i].sim_total_insn, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "sim_total_refs",
										   "total of loads and stores dispatched",
										   &soo[i].sim_total_refs, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "sim_total_loads",
										   "total of loads dispatched",
										   &soo[i].sim_total_loads, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "sim_total_stores",
										   "total of stores dispatched",
										   "sim_total_refs - sim_total_loads",
										   NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "sim_total_branches",
										   "total of branches dispatched",
										   &soo[i].sim_total_branches, 0, NULL);
		soo[i].st_simple->stat_reg_int(sdb, "sim_elapsed_time",
									   "total simulation time in seconds",
									   &sim_elapsed_time, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "sim_inst_rate",
										   "simulation speed (in insts/sec)",
										   "sim_total_insn/sim_elapsed_time",
										   NULL);
		/* register performance stats */
		soo[i].st_simple->stat_reg_counter(sdb, "sim_cycle",
										   "total simulation time in cycles",
										   &soo[i].sim_cycle, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "sim_IPC",
										   "instructions per cycle",
										   "sim_total_insn/sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "sim_CPI",
										   "cycles per instruction",
										   "sim_cycle/sim_total_insn", NULL);
		
		soo[i].st_simple->stat_reg_formula(sdb, "sim_IPB",
										   "instruction per branch",
										   "sim_total_insn / sim_total_branches",
										   NULL);
		/* occupancy stats */
		soo[i].st_simple->stat_reg_counter(sdb, "IFQ_count",
										   "cumulative IFQ occupancy",
										   &soo[i].IFQ_count, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "IFQ_fcount",
										   "cumulative IFQ full count",
										   &soo[i].IFQ_fcount, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ifq_occupancy",
										   "avg IFQ occupancy (insn's)",
										   "IFQ_count / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ifq_rate",
										   "avg IFQ dispatch rate (insn/cycle)",
										   "sim_total_insn / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ifq_latency",
										   "avg IFQ occupant latency (cycle's)",
										   "ifq_occupancy / ifq_rate", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ifq_full",
										   "fraction of time (cycle's) IFQ was full",
										   "IFQ_fcount / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "RUU_count",
										   "cumulative RUU occupancy",
										   &soo[i].RUU_count, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "RUU_fcount",
										   "cumulative RUU full count",
										   &soo[i].RUU_fcount, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ruu_occupancy",
										   "avg RUU occupancy (insn's)",
										   "RUU_count / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ruu_rate",
										   "avg RUU dispatch rate (insn/cycle)",
										   "sim_total_insn / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ruu_latency",
										   "avg RUU occupant latency (cycle's)",
										   "ruu_occupancy / ruu_rate", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "ruu_full",
										   "fraction of time (cycle's) RUU was full",
										   "RUU_fcount / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "LSQ_count",
										   "cumulative LSQ occupancy",
										   &soo[i].LSQ_count, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "LSQ_fcount",
										   "cumulative LSQ full count",
										   &soo[i].LSQ_fcount, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "lsq_occupancy",
										   "avg LSQ occupancy (insn's)",
										   "LSQ_count / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "lsq_rate",
										   "avg LSQ dispatch rate (insn/cycle)",
										   "sim_total_insn / sim_cycle", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "lsq_latency",
										   "avg LSQ occupant latency (cycle's)",
										   "lsq_occupancy / lsq_rate", NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "lsq_full",
										   "fraction of time (cycle's) LSQ was full",
										   "LSQ_fcount / sim_cycle", NULL);
		if ( !soo[i].cache_flush_flag ) {
		  /* register cache stats */
		  if (soo[i].cache_il1
			  && (soo[i].cache_il1 != soo[i].cache_dl1
				  && soo[i].cache_il1 != soo[i].cache_dl2))
			soo[i].cache.cache_reg_stats(soo[i].cache_il1, sdb);
		  if (soo[i].cache_il2
			  && (soo[i].cache_il2 != soo[i].cache_dl1
				  && soo[i].cache_il2 != soo[i].cache_dl2))
			soo[i].cache.cache_reg_stats(soo[i].cache_il2, sdb);
		  if (soo[i].cache_dl1)
			soo[i].cache.cache_reg_stats(soo[i].cache_dl1, sdb);
		  if (soo[i].cache_dl2)
			soo[i].cache.cache_reg_stats(soo[i].cache_dl2, sdb);
		  if (soo[i].itlb)
			soo[i].cache.cache_reg_stats(soo[i].itlb, sdb);
		  if (soo[i].dtlb)
			soo[i].cache.cache_reg_stats(soo[i].dtlb, sdb);		  
		}
		
		/* register predictor stats */
		if (soo[i].pred)
		  soo[i].bpred.bpred_reg_stats(soo[i].pred, sdb);
		soo[i].load_simple.ld_reg_stats(sdb);
		soo[i].mem_simple.mem_reg_stats(soo[i].mem, sdb);
	  }

	  if ( output_condition == 3
		   && soo[i].cache_flush_flag ) {
		
		soo[i].st_simple->stat_reg_comment2(sdb,
											"            = non-particular time statistics =");
		/* register cache stats */
		if (soo[i].cache_il1
			&& (soo[i].cache_il1 != soo[i].cache_dl1
				&& soo[i].cache_il1 != soo[i].cache_dl2))
		  soo[i].cache.cache_reg_stats(soo[i].cache_il1, sdb);
		if (soo[i].cache_il2
			&& (soo[i].cache_il2 != soo[i].cache_dl1
				&& soo[i].cache_il2 != soo[i].cache_dl2))
		  soo[i].cache.cache_reg_stats(soo[i].cache_il2, sdb);
		if (soo[i].cache_dl1)
		  soo[i].cache.cache_reg_stats(soo[i].cache_dl1, sdb);
		if (soo[i].cache_dl2)
		  soo[i].cache.cache_reg_stats(soo[i].cache_dl2, sdb);
		if (soo[i].itlb)
		  soo[i].cache.cache_reg_stats(soo[i].itlb, sdb);
		if (soo[i].dtlb)
		  soo[i].cache.cache_reg_stats(soo[i].dtlb, sdb);
	  }

	  if ( output_condition == 1 || output_condition == 3 ) {
		soo[i].st_simple->stat_reg_comment2(sdb,
											"            = particular time statistics =");
		soo[i].st_simple->stat_reg_counter(sdb, "pts_load_local_data",
										   "total of loads executed(local data)",
										   &soo[i].pts_local_read_access,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_store_local_data",
										   "total of stores executed(local data)",
										   &soo[i].pts_local_write_access,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_load_shared_data",
										   "total of loads executed(shared data)",
										   &soo[i].pts_shared_read_access,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_store_shared_data",
										   "total of stores executed(shared data)",
										   &soo[i].pts_shared_write_access,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_load_sync_data",
										   "total of loads executed(sync data)",
										   &soo[i].pts_sync_read_access,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_store_sync_data",
										   "total of stores executed(sync data)",
										   &soo[i].pts_sync_write_access,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_other_insts",
										   "total of executed insts except for loads and stores",
										   &soo[i].pts_other_inst,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_sim_total_insn",
										   "total of instructions dispatched",
										   &soo[i].pts_sim_total_insn,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_sim_total_refs",
										   "total of loads and stores dispatched",
										   &soo[i].pts_sim_total_refs,
										   0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_sim_total_loads",
										   "total of loads dispatched",
										   &soo[i].pts_sim_total_loads,
										   0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_sim_total_stores",
										   "total of stores dispatched",
										   "pts_sim_total_refs - pts_sim_total_loads",
										   NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_sim_total_branches",
										   "total of branches dispatched",
										   &soo[i].pts_sim_total_branches,
										   0, NULL);
		/* register performance stats */
		soo[i].st_simple->stat_reg_counter(sdb, "pts_sim_cycle",
										   "total simulation time in cycles",
										   &soo[i].clock, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_sim_IPC",
										   "instructions per cycle",
										   "pts_sim_total_insn/pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_sim_CPI",
										   "cycles per instruction",
										   "pts_sim_cycle/pts_sim_total_insn",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_sim_IPB",
										   "instruction per branch",
										   "pts_sim_total_insn / pts_sim_total_branches",
										   NULL);
		/* occupancy stats */
		soo[i].st_simple->stat_reg_counter(sdb, "pts_IFQ_count",
										   "cumulative IFQ occupancy",
										   &soo[i].pts_IFQ_count, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_IFQ_fcount",
										   "cumulative IFQ full count",
										   &soo[i].pts_IFQ_fcount, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ifq_occupancy",
										   "avg IFQ occupancy (insn's)",
										   "pts_IFQ_count / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ifq_rate",
										   "avg IFQ dispatch rate (insn/cycle)",
										   "pts_sim_total_insn / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ifq_latency",
										   "avg IFQ occupant latency (cycle's)",
										   "pts_ifq_occupancy / pts_ifq_rate",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ifq_full",
										   "fraction of time (cycle's) IFQ was full",
										   "pts_IFQ_fcount / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_RUU_count",
										   "cumulative RUU occupancy",
										   &soo[i].pts_RUU_count, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_RUU_fcount",
										   "cumulative RUU full count",
										   &soo[i].pts_RUU_fcount, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ruu_occupancy",
										   "avg RUU occupancy (insn's)",
										   "pts_RUU_count / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ruu_rate",
										   "avg RUU dispatch rate (insn/cycle)",
										   "pts_sim_total_insn / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ruu_latency",
										   "avg RUU occupant latency (cycle's)",
										   "pts_ruu_occupancy / pts_ruu_rate",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_ruu_full",
										   "fraction of time (cycle's) RUU was full",
										   "pts_RUU_fcount / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_LSQ_count",
										   "cumulative LSQ occupancy",
										   &soo[i].pts_LSQ_count, 0, NULL);
		soo[i].st_simple->stat_reg_counter(sdb, "pts_LSQ_fcount",
										   "cumulative LSQ full count",
										   &soo[i].pts_LSQ_fcount, 0, NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_lsq_occupancy",
										   "avg LSQ occupancy (insn's)",
										   "pts_LSQ_count / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_lsq_rate",
										   "avg LSQ dispatch rate (insn/cycle)",
										   "pts_sim_total_insn / pts_sim_cycle",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_lsq_latency",
										   "avg LSQ occupant latency (cycle's)",
										   "pts_lsq_occupancy / pts_lsq_rate",
										   NULL);
		soo[i].st_simple->stat_reg_formula(sdb, "pts_lsq_full",
										   "fraction of time (cycle's) LSQ was full",
										   "pts_LSQ_fcount / pts_sim_cycle",
										   NULL);

		if ( soo[i].cache_flush_flag ) {
		  /* register cache stats */
		  if (soo[i].s_cache_il1
			  && (soo[i].s_cache_il1 != soo[i].s_cache_dl1
				  && soo[i].s_cache_il1 != soo[i].s_cache_dl2))
			soo[i].stat_cache.cache_reg_stats(soo[i].s_cache_il1, sdb);
		  if (soo[i].s_cache_il2
			  && (soo[i].s_cache_il2 != soo[i].s_cache_dl1
				  && soo[i].s_cache_il2 != soo[i].s_cache_dl2))
			soo[i].stat_cache.cache_reg_stats(soo[i].s_cache_il2, sdb);
		  if (soo[i].s_cache_dl1)
			soo[i].stat_cache.cache_reg_stats(soo[i].s_cache_dl1, sdb);
		  if (soo[i].s_cache_dl2)
			soo[i].stat_cache.cache_reg_stats(soo[i].s_cache_dl2, sdb);
		  if (soo[i].s_itlb)
			soo[i].stat_cache.cache_reg_stats(soo[i].s_itlb, sdb);
		  if (soo[i].s_dtlb)
			soo[i].stat_cache.cache_reg_stats(soo[i].s_dtlb, sdb);
		  /* debug variable(s) */
		}
	  }
	}
  }
}

/* initialize the simulator */
void
isim_system::sim_init(void)
{
  for ( unsigned int i = 0; i < soo.size(); i++ ) {
	/* allocate and initialize register file */
	soo[i].load_simple.regs_init(&soo[i].regs);
	
	/* allocate and initialize memory space */
	soo[i].mem = NULL;
	soo[i].mem = soo[i].mem_simple.mem_create("mem");
	soo[i].mem_simple.mem_init(soo[i].mem);
  }
}

/* load program into simulated state */
void
isim_system::sim_load_prog(char *fname,		/* program to load */
						   int argc, char **argv,	/* program arguments */
						   char **envp)		/* program environment */
{
  for ( unsigned int i = 0; i < soo.size(); i++ ) { 
	/* load program text and data, set up environment, memory, and regs */
	soo[i].load_simple.ld_load_prog(fname, argc, argv, envp,
									&soo[i].regs, soo[i].mem, TRUE);
	
	/* finish initialization of the simulation engine */
	soo[i].fu_pool = soo[i].resrc_simple.res_create_pool("fu-pool", 
														 soo[i].fu_config, 
														 N_ELT(soo[i].fu_config));
	soo[i].fetch_init();
	soo[i].ruu_init();
	soo[i].lsq_init();
  }
}

/* dump simulator-specific auxiliary simulator statistics */
void
isim_system::sim_aux_stats(FILE *stream)             /* output stream */
{}

/* un-initialize the simulator */
void
isim_system::sim_uninit(void)
{}


//$B3F%b%8%e!<%k$K%/%m%C%/$rF~$l$k(B
void
isim_system::sim_main(void)
{
  /*ignore any floating point exceptions, they may occur on mis-speculated
	execution paths */
  signal(SIGFPE, SIG_IGN);

  for ( unsigned int i = 0; i < soo.size(); i++ ) {
	/* set up program entry state */
	soo[i].regs.regs_PC = soo[i].load_simple.ld_prog_entry;
	soo[i].regs.regs_NPC = soo[i].regs.regs_PC + sizeof(md_inst_t);
	
	/* fast forward simulator loop, performs functional simulation for
	   FASTFWD_COUNT insts, then turns on performance (timing) simulation */
	if (soo[i].fastfwd_count > 0) {
	  int icount;
	  enum md_opcode op;		/* decoded opcode enum */
	  /* effective address, if load/store */
	  int is_write;			/* store? */
	  fprintf(stderr, "sim: ** fast forwarding %d insts **\n",
			  soo[i].fastfwd_count);
	  
	  for (icount=0; icount < soo[i].fastfwd_count; icount++) {
		/* maintain $r0 semantics */
		soo[i].regs.regs_R[MD_REG_ZERO] = 0;
		
		/* get the next instruction to execute */
		soo[i].md_fetch_inst(soo[i].regs.regs_PC);
		
		/* set default reference address */
		soo[i].addr = 0; is_write = FALSE;
		
		/* set default fault - none */
		soo[i].fault = md_fault_none;
		
		/* decode the instruction */
		MD_SET_OPCODE(op, soo[i].inst);
		
		/* execute the instruction */
		switch (op)
		  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,FLAGS,O1,O2,O3,I1,I2,I3,I4)		\
	    case OP:							\
	      soo[i].SYMCAT(OP,_impl)();						\
	      soo[i].SYMCAT(OP,_reg_impl)();						\
	      break;
#include <isis/machine.def>
		  default:
			panic("attempted to execute a bogus opcode");
		  }
	  
		if (soo[i].fault != md_fault_none)
		  fatal("fault (%d) detected @ 0x%08p",
				soo[i].fault, soo[i].regs.regs_PC);
		
		/* update memory access stats */
		if (MD_OP_FLAGS(op) & F_MEM) {
		  if (MD_OP_FLAGS(op) & F_STORE)
			is_write = TRUE;
		}
	  
		/* go to the next instruction */
		soo[i].regs.regs_PC = soo[i].regs.regs_NPC;
		soo[i].regs.regs_NPC += sizeof(md_inst_t);
	  }
	}
  }

  fprintf(stderr, "sim: ** starting performance simulation **\n");
  
  for ( unsigned int i = 0; i < soo.size(); i++ ) {
	/* set up timing simulation entry state */
	soo[i].fetch_regs_PC = soo[i].regs.regs_PC - sizeof(md_inst_t);
	soo[i].fetch_pred_PC = soo[i].regs.regs_PC;
	soo[i].regs.regs_PC = soo[i].regs.regs_PC - sizeof(md_inst_t);
  }



  /*********************************************
  
 $B"-(B $B<BAu$9$k%7%9%F%`$K0MB8$9$kItJ,(B $B"-(B 

  **********************************************/
  
  /* main simulator loop, NOTE: the pipe stages are traverse in reverse order
     to eliminate this/next state synchronization and relaxation problems */
  for (;;) {

	for ( unsigned int i = 0; i < soo.size(); i++ )
	  soo[i].clock_out();	/* PU
							   -> ruu_commit();
							   -> ruu_writeback();
							   -> ruu_issue(); */

	shbus.get_packet();	 // $B6&M-%P%9(B -> $B%Q%1%C%H<u?.(B
	shbus.send_packet(); // $B6&M-%P%9(B -> $B%Q%1%C%HAw?.(B
	shmem.clock();       /* $B6&M-%a%b%j(B
							-> $B%Q%1%C%H<u?.(B
							-> $B%a%b%j%"%/%;%9(B
							-> $B%Q%1%C%HAw?.(B */

	for ( unsigned int i = 0; i < soo.size(); i++ ) {
	  soo[i].clock_in();	/* PU
							   -> ruu_dispatch();
							   -> ruu_fetch();
							   -> get_packet(); */

	  /* go to next cycle */
	  soo[i].sim_cycle++;
	  /* finish early? */
	  if (soo[i].max_insts
		  && soo[i].sim_num_insn >= soo[i].max_insts) {
		return;
	  }
	}
  }
}

