/* sim-outorder.c - sample out-of-order issue perf simulator implementation */

/* SimpleScalar(TM) Tool Suite
 * Copyright (C) 1994-2003 by Todd M. Austin, Ph.D. and SimpleScalar, LLC.
 * All Rights Reserved. 
 * 
 * THIS IS A LEGAL DOCUMENT, BY USING SIMPLESCALAR,
 * YOU ARE AGREEING TO THESE TERMS AND CONDITIONS.
 * 
 * No portion of this work may be used by any commercial entity, or for any
 * commercial purpose, without the prior, written permission of SimpleScalar,
 * LLC (info@simplescalar.com). Nonprofit and noncommercial use is permitted
 * as described below.
 * 
 * 1. SimpleScalar is provided AS IS, with no warranty of any kind, express
 * or implied. The user of the program accepts full responsibility for the
 * application of the program and the use of any results.
 * 
 * 2. Nonprofit and noncommercial use is encouraged. SimpleScalar may be
 * downloaded, compiled, executed, copied, and modified solely for nonprofit,
 * educational, noncommercial research, and noncommercial scholarship
 * purposes provided that this notice in its entirety accompanies all copies.
 * Copies of the modified software can be delivered to persons who use it
 * solely for nonprofit, educational, noncommercial research, and
 * noncommercial scholarship purposes provided that this notice in its
 * entirety accompanies all copies.
 * 
 * 3. ALL COMMERCIAL USE, AND ALL USE BY FOR PROFIT ENTITIES, IS EXPRESSLY
 * PROHIBITED WITHOUT A LICENSE FROM SIMPLESCALAR, LLC (info@simplescalar.com).
 * 
 * 4. No nonprofit user may place any restrictions on the use of this software,
 * including as modified by the user, by any other authorized user.
 * 
 * 5. Noncommercial and nonprofit users may distribute copies of SimpleScalar
 * in compiled or executable form as set forth in Section 2, provided that
 * either: (A) it is accompanied by the corresponding machine-readable source
 * code, or (B) it is accompanied by a written offer, with no time limit, to
 * give anyone a machine-readable copy of the corresponding source code in
 * return for reimbursement of the cost of distribution. This written offer
 * must permit verbatim duplication by anyone, or (C) it is distributed by
 * someone who received only the executable form, and is accompanied by a
 * copy of the written offer of source code.
 * 
 * 6. SimpleScalar was developed by Todd M. Austin, Ph.D. The tool suite is
 * currently maintained by SimpleScalar LLC (info@simplescalar.com). US Mail:
 * 2395 Timbercrest Court, Ann Arbor, MI 48105.
 * 
 * Copyright (C) 1994-2003 by Todd M. Austin, Ph.D. and SimpleScalar, LLC.
 */

#include "isim_processor.h"
//#define COUT_PIPELINE
//#define GET_PU_STATISTICS

isim_processor::isim_processor()
{
  twolev_nelt = 4;
  twolev_config[0] = 1; // l1size 
  twolev_config[1] = 1024; // l2size 
  twolev_config[2] = 8; // hist
  twolev_config[3] = FALSE; // xor
  bimod_nelt = 1;
  bimod_config[0] = 2048;
  comb_nelt = 1;
  comb_config[0] = 1024;
  ras_size = 8;
  btb_nelt = 2;
  btb_config[0] = 512; // nsets
  btb_config[1] = 4; // assoc
  //  RUU_size = 8;
  //  LSQ_size = 4;
  mem_nelt = 2;
  mem_lat[0] = 18; // lat to first chunk
  mem_lat[1] = 2; // lat between remaining chunks
  //pcstat_nelt = 0;
  fu_config[0].name = "integer-ALU";
  fu_config[0].quantity = 4;
  fu_config[0].busy = 0;
  fu_config[0].x[0].kclass = IntALU;
  fu_config[0].x[0].oplat = 1;
  fu_config[0].x[0].issuelat = 1;
  fu_config[1].name = "integer-MULT/DIV";
  fu_config[1].quantity = 1;
  fu_config[1].busy = 0;
  fu_config[1].x[0].kclass = IntMULT;
  fu_config[1].x[0].oplat = 3;
  fu_config[1].x[0].issuelat = 1;
  fu_config[1].x[1].kclass = IntDIV;
  fu_config[1].x[1].oplat = 20;
  fu_config[1].x[1].issuelat = 19;
  fu_config[2].name = "memory-port";
  fu_config[2].quantity = 2;
  fu_config[2].busy = 0;
  fu_config[2].x[0].kclass = RdPort;
  fu_config[2].x[0].oplat = 1;
  fu_config[2].x[0].issuelat = 1;
  fu_config[2].x[1].kclass = WrPort;
  fu_config[2].x[1].oplat = 1;
  fu_config[2].x[1].issuelat = 1;
  fu_config[3].name = "FP-adder";
  fu_config[3].quantity = 4;
  fu_config[3].busy = 0;
  fu_config[3].x[0].kclass = FloatADD;
  fu_config[3].x[0].oplat = 2;
  fu_config[3].x[0].issuelat = 1;
  fu_config[3].x[1].kclass = FloatCMP;
  fu_config[3].x[1].oplat = 2;
  fu_config[3].x[1].issuelat = 1;
  fu_config[3].x[2].kclass = FloatCVT;
  fu_config[3].x[2].oplat = 2;
  fu_config[3].x[2].issuelat = 1;
  fu_config[4].name = "FP-MULT/DIV";
  fu_config[4].quantity = 1;
  fu_config[4].busy = 0;
  fu_config[4].x[0].kclass = FloatMULT;
  fu_config[4].x[0].oplat = 4;
  fu_config[4].x[0].issuelat = 1;
  fu_config[4].x[1].kclass = FloatDIV;
  fu_config[4].x[1].oplat = 12;
  fu_config[4].x[1].issuelat = 12;
  fu_config[4].x[2].kclass = FloatSQRT;
  fu_config[4].x[2].oplat = 24;
  fu_config[4].x[2].issuelat = 24;
  sim_total_insn = 0;
  sim_total_refs = 0;
  sim_total_loads = 0;
  sim_total_branches = 0;
  pts_sim_total_insn = 0;
  pts_sim_total_refs = 0;
  pts_sim_total_loads = 0;
  pts_sim_total_branches = 0;
  sim_cycle = 0;
  inst_seq = 0;
  ruu_fetch_issue_delay = 0;
  fu_pool = NULL;
  load_simple.connect_mem(mem_simple);
  branch_seq = 0;
  //outgoing_port.resize();
  //incoming_port.resize(fu_config[2].quantity);
  load_flag = false;
  clock_flag = false;
  clock = 0;
  local_read_access = 0;
  shared_read_access = 0;
  sync_read_access = 0;
  local_write_access = 0;
  shared_write_access = 0;
  sync_write_access = 0;
  other_inst = 0;
  pts_local_read_access = 0;
  pts_shared_read_access = 0;
  pts_sync_read_access = 0;
  pts_local_write_access = 0;
  pts_shared_write_access = 0;
  pts_sync_write_access = 0;
  pts_other_inst = 0;
  sim_num_insn = 0;
  cache_flush_flag = 0;
}

md_addr_t 
isim_processor::iacompress(md_addr_t adr)
{
  if (compress_icache_addrs)
	return (((adr - load_simple.ld_text_base) >> 1)
			+ load_simple.ld_text_base);
  else
	return adr;
}

int
isim_processor::iscompress(size_t size)
{
  if (compress_icache_addrs)
	return (size >> 1);
  else
	return size;
}

counter_t
isim_processor::statval(struct stat_stat_t *stat)
{
  if (stat->sc == sc_int)
	return (counter_t)*(stat->variant.for_int.var);
  else if (stat->sc == sc_uint)
	return (counter_t)*(stat->variant.for_uint.var);
  else if (stat->sc == sc_counter)
	return *(stat->variant.for_counter.var);
  else {
	panic("bad stat class");
	return 0;
  }
}

/* memory access latency, assumed to not cross a page boundary */
unsigned int			/* total latency of access */
isim_processor::mem_access_latency(int blk_sz)		/* block size accessed */
{
  int chunks = (blk_sz + (mem_bus_width - 1)) / mem_bus_width;

  assert(chunks > 0);

  return (/* first chunk latency */mem_lat[0] +
		  (/* remainder chunk latency */mem_lat[1] * (chunks - 1)));
}

/* allocate and initialize register update unit (RUU) */
void
isim_processor::ruu_init(void)
{
  RUU_count = 0;
  RUU_fcount = 0;
  pts_RUU_count = 0;
  pts_RUU_fcount = 0;
}

/* allocate and initialize the load/store queue (LSQ) */
void
isim_processor::lsq_init(void)
{
  LSQ_count = 0;
  LSQ_fcount = 0;
  pts_LSQ_count = 0;
  pts_LSQ_fcount = 0;
}

/* service all functional unit release events, this function is called
   once per cycle, and it used to step the BUSY timers attached to each
   functional unit in the function unit resource pool, as long as a functional
   unit's BUSY count is > 0, it cannot be issued an operation */
void
isim_processor::ruu_release_fu(void)
{
  int i;

  /* walk all resource units, decrement busy counts by one */
  for (i=0; i<fu_pool->num_resources; i++) {
	/* resource is released when BUSY hits zero */
	if (fu_pool->resources[i].busy > 0)
	  fu_pool->resources[i].busy--;
  }
}

void
isim_processor::get_data(void)
{
  for ( int g = 0; g < outside_port_num; g++ ) {
	if ( incoming_port[g].have_packet() && incoming_port[g].inst_id() != -1 
		 && incoming_port[g].is_reply()
		 && incoming_port[g].puid() == puid ) {

	  instqueue::INSTList_type* bus_inst;
	  bus_inst = instq.return_inst(incoming_port[g].inst_id());
	  if (bus_inst != NULL) {
		bus_inst->load_data = incoming_port[g].data();
		bus_inst->load_data2 = incoming_port[g].data2();
		bus_inst->load_data_half = incoming_port[g].data_half();
		bus_inst->load_data_byte = incoming_port[g].data_byte();
		bus_inst->load_finish = true;
	  }
	  incoming_port[g].reset_packet();
	}
  }
}

/* dispatch instructions from the IFETCH -> DISPATCH queue: instructions are
   first decoded, then they allocated RUU (and LSQ for load/stores) resources
   and input and output dependence chains are updated accordingly */
void
isim_processor::ruu_commit(void)
{
  enum md_opcode op;			/* decoded opcode enum */
  int depend;
  int commit_num = 0;
  struct res_template *fu;
  instqueue::INSTList_type* com_inst;

#ifdef COUT_PIPELINE  
  cout << endl << dec << "@" << sim_cycle << endl;
#endif

  while ( ((com_inst = wb_buf.ready_label(sim_cycle)) != NULL ) ) {

	inst = com_inst->inst;
	MD_SET_OPCODE(op, inst);	

	if ( com_inst->branch_result == 0 ) {
	  //do nothing
	} else if ( com_inst->branch_result == 1 //$BJ,4t3NDj(B
				&& commit_num < ruu_commit_width ) {
	  regs.regs_R[MD_REG_ZERO] = 0;	

	  //$B%l%8%9%?4V0MB84X78$N%A%'%C%/(B
	  int label[D_SIZE], opr[D_SIZE], d_rw[D_SIZE], my_rw[D_SIZE];
	  for (int i = 0; i < D_SIZE; i++ )
		label[i] = opr[i] = d_rw[i] = my_rw[i] = 0;

	  depend = order_buf.check_order(com_inst->seq, com_inst->adr_flag,
									 com_inst->in1, com_inst->in2,
									 com_inst->in3, com_inst->in4,
									 com_inst->out1, com_inst->out2,
									 com_inst->out3, label, opr, d_rw, my_rw);

	  if ( depend <= -2 ) {	 //$B%l%8%9%?4V$K0MB84X78$,$J$$(B
		if ((MD_OP_FLAGS(op) & (F_MEM|F_STORE)) == (F_MEM|F_STORE)){//Store$BL?Na(B

		  if (com_inst->load_flag) //Store$BL?Na$@$1$I%a%b%j$+$i(BData$B$rFI$`(B
			assert(com_inst->load_finish);
		  //$B%a%b%j$+$iFI$_=P$7$?(BData$B$r3NJ](B
		  load_data = com_inst->load_data;
		  load_data2 = com_inst->load_data2;
		  load_data_half = com_inst->load_data_half;
		  load_data_byte = com_inst->load_data_byte;		  
		  access_adr = com_inst->adr;
		  access_adr2 = com_inst->adr2;
		  int load_lat, tlb_lat;

		  regs.regs_R[MD_REG_ZERO] = 0;
		  addr_area = com_inst->adr_area;
		  exec_stop_flag = false;
		  assert( access_adr >= 0x10000000 );
		  req_label = com_inst->seq;

		  if ( access_adr <= MACHINE_ADDRESS_BOTTOM ) {//local memory access
			//Functional Unit$B$N3NJ]$r;n$_$k(B
			fu = resrc_simple.res_get(fu_pool, MD_OP_FUCLASS(op));
			if (fu) { //Functional Unit$B$r3NJ](B
			  if (fu->master->busy)
				panic("functional unit already in use");
			  fu->master->busy = fu->issuelat;			  
				
			  load_lat = cache_dl1_lat;
			  /*$B%-%c%C%7%e%"%/%;%9$7$?$3$H$r(Bcache$B$KDLCN(B
				load lat$B$O;HMQ$7$J$$(B */
			  if ( cache_dl1 ) {
				if ( cache_dl2 ) {
				  if ( clock_flag && cache_flush_flag ) {
					load_lat
					  = stat_cache.cache_access(s_cache_dl1, Write,
												(access_adr & ~3), NULL, 4,
												sim_cycle, NULL, NULL, 0,
												s_cache_dl2,
												mem_bus_width,
												mem_lat[0], mem_lat[1]);
				  } else {
					load_lat
					  = cache.cache_access(cache_dl1, Write,
										   (access_adr & ~3), NULL, 4,
										   sim_cycle, NULL, NULL, 0,
										   cache_dl2,
										   mem_bus_width,
										   mem_lat[0], mem_lat[1]);
				  }
				} else {
				  if ( clock_flag && cache_flush_flag ) {
					load_lat
					  = stat_cache.cache_access(s_cache_dl1, Write,
												(access_adr & ~3), NULL, 4,
												sim_cycle, NULL, NULL, 0, 0,
												mem_bus_width,
												mem_lat[0], mem_lat[1]);
				  } else {
					load_lat
					  = cache.cache_access(cache_dl1, Write,
										   (access_adr & ~3), NULL, 4,
										   sim_cycle, NULL, NULL, 0, 0,
										   mem_bus_width,
										   mem_lat[0], mem_lat[1]);
				  }
				}
			  }
				
			  if ( dtlb ) {
				if ( clock_flag && cache_flush_flag ) {
				  tlb_lat
					= stat_cache.cache_access(s_dtlb, Write,
											  (access_adr & ~3),
											  NULL, 4, sim_cycle, NULL, NULL,
											  tlb_miss_lat, 0, mem_bus_width,
											  mem_lat[0], mem_lat[1]);
				} else {
				  tlb_lat
					= cache.cache_access(dtlb, Write, (access_adr & ~3),
										 NULL, 4, sim_cycle, NULL, NULL,
										 tlb_miss_lat, 0, mem_bus_width,
										 mem_lat[0], mem_lat[1]);
				}
			  }
			  
#ifdef COUT_PIPELINE
			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                                                        COMMIT:" << hex << com_inst->now_PC << " " << NAME << " (" << dec << com_inst->seq << ") or"; \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
				default:
				  break;
				}
#endif
			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_reg_impl)(com_inst);  \
	  break;
#include "machine.def"
				default:
				  break;
				}
			  
			  assert(!exec_stop_flag);

			  //if(!exec_stop_flag){//$BL5;v30It%"%/%;%9%]!<%H$KH/9T$G$-$?(B
			  commit_num++;
			  
			  //$BE}7W%G!<%?$N%+%&%s%H(B
			  assert( access_adr <= MACHINE_ADDRESS_BOTTOM );
			  local_write_access++;
			  //else if ( access_adr < SHARED_MEM_TOP )
			  //sync_write_access++;
			  //else if ( access_adr >= SHARED_MEM_TOP )
			  //shared_write_access++;
			  if ( clock_flag ) {
				assert( access_adr <= MACHINE_ADDRESS_BOTTOM );
				pts_local_write_access++;
				//else if ( access_adr < SHARED_MEM_TOP )
				//pts_sync_write_access++;
				//else if ( access_adr >= SHARED_MEM_TOP )
				//pts_shared_write_access++;
			  }
				
			  //$BL?Na$NGK4~(B
			  wb_buf.deletelist(com_inst->seq);
			  order_buf.deletelist(com_inst->seq);			  
			  addr_order_buf.deletelist(com_inst->seq);
			  exec_buf.deletelist(com_inst->seq);
			  instq.deletelist(com_inst->seq);
			} else { //Functional Unit$B$,3NJ]$G$-$J$+$C$?(B
			  //do nothing
			}
		  } else { //$B6&M-%a%b%j$X$N%"%/%;%9(B
			
#ifdef COUT_PIPELINE
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                                                        COMMIT:" << hex << com_inst->now_PC << " " << NAME << " (" << dec << com_inst->seq << ") or"; \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
			  default:
				break;
			  }
#endif
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_reg_impl)(com_inst);  \
	  break;
#include "machine.def"
			  default:
				break;
			  }
			  
			if(!exec_stop_flag){//$BL5;v30It%"%/%;%9%]!<%H$KH/9T$G$-$?(B
			  commit_num++;
			  
			  //$BE}7W%G!<%?$N%+%&%s%H(B
			  assert( access_adr > MACHINE_ADDRESS_BOTTOM );
			  //local_write_access++;
			  if ( access_adr < SHARED_MEM_TOP )
				sync_write_access++;
			  else if ( access_adr >= SHARED_MEM_TOP )
				shared_write_access++;
			  if ( clock_flag ) {
				assert( access_adr > MACHINE_ADDRESS_BOTTOM );
				//pts_local_write_access++;
				if ( access_adr < SHARED_MEM_TOP )
				  pts_sync_write_access++;
				else if ( access_adr >= SHARED_MEM_TOP )
				  pts_shared_write_access++;
			  }
			  
			  //$BL?Na$NGK4~(B
			  wb_buf.deletelist(com_inst->seq);
			  order_buf.deletelist(com_inst->seq);			  
			  addr_order_buf.deletelist(com_inst->seq);
			  exec_buf.deletelist(com_inst->seq);
			  instq.deletelist(com_inst->seq);
			}
		  }
		} else { //Store$BL?Na$G$J$$(B
		  order_buf.deletelist(com_inst->seq);
		  result1 = result2 = 0;
		  result_i = 0;
		  result_ff = result_fd = 0;
		  access_adr = com_inst->adr;
		  access_adr2 = com_inst->adr2;
		  //reorder buffer$B$+$i<+J,$N<B9T7k2L$r3NJ](B
		  wb_buf.get_element(com_inst->seq, &result1, &result2,
							 &result_i, &result_ff, &result_fd);
		  wb_buf.deletelist(com_inst->seq);
		  commit_num++;

#ifdef COUT_PIPELINE  
		  switch (op)
			{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                                                        COMMIT:" << hex << com_inst->now_PC << " " << NAME << " (" << dec << com_inst->seq << ") or"; \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
			default:
			  break;
			}
#endif
		  switch (op)
			{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_reg_impl)(com_inst);  \
	  break;
#include "machine.def"
			default:
			  break;
			}
		  //$BL?Na$rGK4~(B
		  instq.deletelist(com_inst->seq);
		}
	  }
	}
  }
}

void
isim_processor::ruu_writeback(void)
{
  enum md_opcode op;/* decoded opcode enum */
  /* effective address, if load/store */
  int depend;
  md_addr_t branch_PC, next_PC;
  struct res_template *fu;
  instqueue::INSTList_type* wb_inst;
  instqueue::INSTList_type* temp_inst; 

  while ( (wb_inst = exec_buf.ready_label(sim_cycle)) != NULL ) {

	assert(wb_inst != NULL);
	inst = wb_inst->inst;
  	MD_SET_OPCODE(op, inst);
	
	if ( wb_inst->branch_result == 1
		 && wb_inst->exec_time > sim_cycle ) {
	  //do nothing
	} else if ( ( wb_inst->branch_result == 1 //$BJ,4t3NDj(B
				  && wb_inst->exec_time <= sim_cycle //$B<B9T%l%$%F%s%7=<B-(B
				  && (wb_inst->load_flag == false  //Load$BL?Na$G$O$J$$(B
					  // Load$BL?Na$G(BData$B$,MQ0U$G$-$F$$$k(B					  
					  || ( wb_inst->load_flag && wb_inst->load_finish )))
				|| ( wb_inst->branch_result == 0 //$BJ,4tL$3NDj(B
					 && wb_inst->exec_time <= sim_cycle //$B<B9T%l%$%F%s%7=<B-(B
				  && !(MD_OP_FLAGS(op) & F_CTRL) //$BJ,4tL?Na$G$O$J$$(B
				  &&  ( wb_inst->load_flag == false //Load$BL?Na$G$O$J$$(B
						//Load$BL?Na$G(BData$B$,MQ0U$G$-$F$$$k(B
						|| (wb_inst->load_flag && wb_inst->load_finish)))){
	  
	  regs.regs_R[MD_REG_ZERO] = 0;
	  access_adr = wb_inst->adr;
	  access_adr2 = wb_inst->adr2;	
	  int label[D_SIZE], opr[D_SIZE],d_rw[D_SIZE], my_rw[D_SIZE];
	
	  for (int i = 0; i < D_SIZE; i++ )
		label[i] = opr[i] = d_rw[i] = my_rw[i] = 0;
	  //$B%l%8%9%?4V0MB84X78$N%A%'%C%/3+;O(B
	  depend = order_buf.check_order(wb_inst->seq, wb_inst->adr_flag,
									 wb_inst->in1, wb_inst->in2,
									 wb_inst->in3, wb_inst->in4,
									 wb_inst->out1, wb_inst->out2,
									 wb_inst->out3, label, opr, d_rw, my_rw);

	  //$B0MB8(BData$B$r(Breorder buffer$B$+$iFI$_=P$;$k$+%A%'%C%/(B
	  int e_flag = 0;
	  for ( int a = 0; a < depend; a++ ) {
		if ( d_rw[a] == 1 && my_rw[a] == 0 ) {
		  if ( e_flag != -1 )
			e_flag = 2;
		}
		if ( ( d_rw[a] == 1 && my_rw[a] == 0 ) 
			 && !wb_buf.is(label[a]) ) { //$B0MB8(BData$B$,(Breorder buffer$B$K$J$$(B
		  e_flag = -1; //$B$^$@<B9T$G$-$J$$(B
		  break;
		}
	  }
	  
	  in1_flag = in2_flag = in3_flag = in4_flag = false;		

	  if ( e_flag == 2 ) { //$B0MB8(BData$B$,(Breorder buffer$B$K$"$k(B
		for(int a = 1; a <=4; a++ ) {
		  s_i[a] = s1[a] = s2[a] = 0;
		  s_ff[a] = s_fd[a] = 0;
		  s_i_flag[a] = s_ff_flag[a]
			= s_fd_flag[a] = s1_flag[a] = s2_flag[a] = false;
		}
		//Read$B%l%8%9%?(B1$B$H0MB8$9$k(BData$B$r(Breorder buffer$B$+$i<hF@(B
		if ( wb_inst->in1 != 0 ) {
		  for ( int a = 0; a < depend; a++ ) {
			if ( wb_inst->in1 == opr[a] && d_rw[a] == 1 ) {
			  wb_buf.get_data(opr[a], label[a],
							  &s1_flag[1], &s1[1], &s2_flag[1], &s2[1],
							  &s_i_flag[1], &s_i[1], &s_ff_flag[1], &s_ff[1],
							  &s_fd_flag[1], &s_fd[1], sim_cycle);
			  in1_flag = true;
			  break;
			}
		  }
		}
		//Read$B%l%8%9%?(B2$B$H0MB8$9$k(BData$B$r(Breorder buffer$B$+$i<hF@(B
		if ( wb_inst->in2 != 0 ) {
		  for ( int a = 0; a < depend; a++ ) {
			if ( wb_inst->in2 == opr[a] && d_rw[a] == 1 ) {
			  wb_buf.get_data(opr[a], label[a], 
							  &s1_flag[2], &s1[2], &s2_flag[2], &s2[2],
							  &s_i_flag[2], &s_i[2], &s_ff_flag[2], &s_ff[2],
							  &s_fd_flag[2], &s_fd[2], sim_cycle);
			  in2_flag = true;
			  break;
			}
		  }
		}
		//Read$B%l%8%9%?(B3$B$H0MB8$9$k(BData$B$r(Breorder buffer$B$+$i<hF@(B
		if ( wb_inst->in3 != 0 ) {
		  for ( int a = 0; a < depend; a++ ) {
			if ( wb_inst->in3 == opr[a] && d_rw[a] == 1 ) {
			  wb_buf.get_data(opr[a], label[a], 
							  &s1_flag[3], &s1[3], &s2_flag[3], &s2[3],
							  &s_i_flag[3], &s_i[3], &s_ff_flag[3], &s_ff[3],
							  &s_fd_flag[3], &s_fd[3], sim_cycle);
			  in3_flag = true;
			  break;
			}
		  }
		}
		//Read$B%l%8%9%?(B4$B$H0MB8$9$k(BData$B$r(Breorder buffer$B$+$i<hF@(B
		if ( wb_inst->in4 != 0 ) {
		  for ( int a = 0; a < depend; a++ ) {
			if ( wb_inst->in4 == opr[a] && d_rw[a] == 1 ) {
			  wb_buf.get_data(opr[a], label[a], 
							  &s1_flag[4], &s1[4], &s2_flag[4], &s2[4],
							  &s_i_flag[4], &s_i[4], &s_ff_flag[4], &s_ff[4],
							  &s_fd_flag[4], &s_fd[4], sim_cycle);
			  in4_flag = true;
			  break;
			}
		  }
		}
	  }
	  //$B%l%8%9%?4V0MB84X78$N%A%'%C%/=*N;(B

	  if ( e_flag >= 0 //$B0MB8(BData$B$r(Breorder buffer$B$+$i<hF@$G$-$?(B
		   || depend < 0 ) { //$B%l%8%9%?4V$K0MB84X78$,$J$$(B
		if ((MD_OP_FLAGS(op)&(F_MEM|F_STORE)) == (F_MEM|F_STORE)) {//Store$BL?Na(B

		  result1 = result2 = 0;
		  result_i = 0;
		  result_ff = result_fd = 0;
		  exec_stop_flag = false;
		  
		  switch (op)
			{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	        case OP:	\
              SYMCAT(OP,_impl)(wb_inst);  \
	          break;
#include "machine.def"
			default:
			  break;
			}

		  if ( !exec_stop_flag ) { //$BL5;v30It%"%/%;%9%]!<%H$KH/9T$G$-$?(B
			exec_buf.deletelist(wb_inst->seq);
			addr_order_buf.insert_store_data(wb_inst->seq, result1, result2);
		  }
		} else if ((MD_OP_FLAGS(op) & F_CTRL)) {//$BJ,4tL?Na(B
		  exec_buf.deletelist(wb_inst->seq);
		  branch_PC = wb_inst->prev_branch_PC;
		  next_PC = wb_inst->next_PC;
		  regs.regs_PC = wb_inst->now_PC; 
		  regs.regs_NPC = regs.regs_PC + sizeof(md_inst_t);		  
		  result1 = result2 = result_i = 0;
		  result_ff = result_fd = 0;
		  result1_flag = result2_flag = result_i_flag = false;
		  result_ff_flag = result_fd_flag = false;
		  exec_stop_flag = false;
		  
#ifdef COUT_PIPELINE  
		  switch (op)
			{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                                      WRITBK:" << hex << wb_inst->now_PC << " " << NAME << " (" << dec << wb_inst->seq << ") or"; \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
			default:
			  break;
			}
#endif
		
		  switch (op)
			{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_impl)(wb_inst);  \
	  break;
#include "machine.def" 
			default:
			  break;
			}

		  assert(!exec_stop_flag);
		  
		  if ( result1_flag ) { //$B99?7$9$Y$-%l%8%9%?$,$"$k(B
			int tmp_register[2];
			tmp_register[0] = tmp_register[1] = 0;
			int r = 0;
			
			//$B<B9T7k2L$r(Breorder buffer$B$KEjF~$9$k$?$a$N;vA0=hM}3+;O(B
			if (wb_inst->out1) {
			  tmp_register[r] = wb_inst->out1;
			  r++;
			}
			if (wb_inst->out2) {
			  tmp_register[r] = wb_inst->out2;
			  r++;
			}
			if (wb_inst->out3) {
			  tmp_register[r] = wb_inst->out3;
			  r++;
			}
			//$B<B9T7k2L$r(Breorder buffer$B$KEjF~$9$k$?$a$N;vA0=hM}=*N;(B

			//$B<B9T7k2L$r(Breorder buffer$B$KDI2C(B
			wb_buf.insertlist(wb_inst, tmp_register[0], tmp_register[1],
							  result1_flag, result1, result2_flag, result2,
							  result_i_flag, result_i, result_ff_flag,
							  result_ff, result_fd_flag, result_fd, sim_cycle);
		  } else { //$B99?7$9$Y$-%l%8%9%?$,$J$$(B
			order_buf.deletelist(wb_inst->seq);
			instq.deletelist(wb_inst->seq);
		  }
		
		  if ( regs.regs_NPC != wb_inst->pred_PC ) { //$BJ,4tM=B,%O%:%l(B
#ifdef COUT_PIPELINE  
			cout << "---bad " << hex << regs.regs_NPC << dec << " " << sim_cycle << endl;
#endif
			instq.set_bad_branch(wb_inst->seq);
			fetch_pred_PC = fetch_regs_PC = regs.regs_NPC;
			fetch_head = fetch_num = fetch_tail = 0;		
			prev_branch_PC = 0;
			branch_seq = 0;
			//branch predictor$B99?7(B
			bpred.bpred_update(pred,
							   wb_inst->now_PC,
							   regs.regs_NPC,
							   regs.regs_NPC != (wb_inst->now_PC
												 + sizeof(md_inst_t)),
							   wb_inst->pred_PC
							   != (wb_inst->now_PC + sizeof(md_inst_t)),
							   wb_inst->pred_PC == regs.regs_NPC,
							   op,
							   wb_inst->dir);
			//$B8m$C$?J,4tL?Na$K=>$C$F$7$^$C$?L?Na$rGK4~3+;O(B
			exec_buf.deletelist_after_this(wb_inst->seq);
			wb_buf.deletelist_after_this(wb_inst->seq);
			order_buf.deletelist_after_this(wb_inst->seq);
			addr_order_buf.deletelist_after_this(wb_inst->seq);
			while ((temp_inst = instq.return_after_this(wb_inst->seq))!= NULL){
			  if ( (temp_inst->exec_time - sim_cycle ) > 0 ) {
				fu = temp_inst->fu;
				if (fu) fu->master->busy = 0;
			  }
			  instq.deletelist(temp_inst->seq);
			}
			//$B8m$C$?J,4tL?Na$K=>$C$F$7$^$C$?L?Na$rGK4~=*N;(B
		  } else { //$BJ,4tM=B,%"%?%j(B
#ifdef COUT_PIPELINE  
			cout << "---goo " << hex << regs.regs_NPC << dec << " " << sim_cycle << endl;
#endif
			instq.set_good_branch(wb_inst->seq);
			if ( branch_seq == wb_inst->seq )
			  branch_seq = 0;
			//branch predictor$B99?7(B
			bpred.bpred_update(pred,
							   wb_inst->now_PC,
							   regs.regs_NPC,
							   regs.regs_NPC != (wb_inst->now_PC
												 + sizeof(md_inst_t)),
							   wb_inst->pred_PC
							   != (wb_inst->now_PC
								   + sizeof(md_inst_t)),
							   wb_inst->pred_PC == regs.regs_NPC,
							   op,
							   wb_inst->dir);
		  }
		} else {  // Store$BL?Na$G$b$J$/!"J,4tL?Na$G$b$J$$(B
		  // not  (MD_OP_FLAGS(op)&(F_MEM|F_STORE)) == (F_MEM|F_STORE)) &&
		  // not ((MD_OP_FLAGS(op) & F_CTRL))
		  result1 = result2 = result_i = 0;
		  result_ff = result_fd = 0;
		  result1_flag = result2_flag = result_i_flag = false;
		  result_ff_flag = result_fd_flag = false;
		  exec_stop_flag = false;
		  bool exec = false;
	  
		  if ((MD_OP_FLAGS(op) & (F_MEM|F_LOAD)) == (F_MEM|F_LOAD)){//Load$BL?Na(B
			if (access_adr >= 0x10000000 ) { //$B%G!<%?NN0hFb$N%"%I%l%9(B
			  exec = true;
			  //$B%a%b%j$+$iFI$_=P$5$l$?(BData$B$r3NJ](B
			  load_data = wb_inst->load_data;
			  load_data2 = wb_inst->load_data2;
			  load_data_half = wb_inst->load_data_half;
			  load_data_byte = wb_inst->load_data_byte;
			  load_flag = wb_inst->load_flag;
			  /*$BA0$K$"$kF1$8%"%I%l%9$K%"%/%;%9$9$k(B
				Store$BL?Na$+$iN.$5$l$F$-$?(BData$B$r3NJ](B*/
			  lsq_data = wb_inst->lsq_data;
			  lsq_data_flag = wb_inst->lsq_data_flag;
			  /*$B6&M-%a%b%j%"%/%;%9$N>l9g$K$O!"(B
				$BI,$:>e$N$I$A$i$+$+$i(BData$B$r3NJ]$G$-$k(B*/
#ifdef COUT_PIPELINE  
			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                                      WRITBK:" << hex << wb_inst->now_PC << " " << NAME << " (" << dec << wb_inst->seq << ") or"; \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
				default:
				  break;
				}
#endif
			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_impl)(wb_inst);  \
	  break;
#include "machine.def" 
				default:
				  break;
				}
			}   
		  } else { //Load$BL?Na$G$J$$(B
			exec = true;
#ifdef COUT_PIPELINE  
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                                      WRITBK:" << hex << wb_inst->now_PC << " " << NAME << " (" << dec << wb_inst->seq << ") or";  \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
			  default:
				break;
			  }
#endif
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_impl)(wb_inst);  \
	  break;
#include "machine.def" 
			  default:
				break;
			  }
		  }

		  if ( !exec_stop_flag //$BL5;v30It%"%/%;%9%]!<%H$KH/9T$G$-$?(B
			   && exec ) { //Store$BL?Na$G$b$J$/!"J,4tL?Na$G$b$J$$(B
			exec_buf.deletelist(wb_inst->seq);		  
			//$B<B9T7k2L$r(Breorder buffer$B$KEjF~$9$k$?$a$N;vA0=hM}3+;O(B
			int tmp_register[2], r;
			tmp_register[0] = tmp_register[1] = r = 0;
			if (wb_inst->out1) {
			  tmp_register[r] = wb_inst->out1;
			  r++;
			}
			if (wb_inst->out2) {
			  tmp_register[r] = wb_inst->out2;
			  r++;
			}
			if (wb_inst->out3) {
			  tmp_register[r] = wb_inst->out3;
			  r++;
			}
			//$B<B9T7k2L$r(Breorder buffer$B$KEjF~$9$k$?$a$N;vA0=hM}=*N;(B

			//$B<B9T7k2L$r(Breorder buffer$B$KEjF~(B
			wb_buf.insertlist(wb_inst, tmp_register[0], tmp_register[1],
							  result1_flag, result1, result2_flag, result2,
							  result_i_flag, result_i, result_ff_flag,
							  result_ff, result_fd_flag, result_fd, sim_cycle);
			if ((MD_OP_FLAGS(op) & (F_MEM|F_LOAD)) == (F_MEM|F_LOAD))
			  addr_order_buf.deletelist(wb_inst->seq);
		  }
		}
	  }
	}
  }
}

void
isim_processor::ruu_issue(void)
{
  int load_lat, tlb_lat, depend, issue_num = 0;
  struct res_template *fu;
  enum md_opcode op;			/* decoded opcode enum */
  bool addr_ok, issue_flag;  
  instqueue::INSTList_type* rvs_inst;
  
  while ( issue_num < ruu_issue_width && 
		  ( rvs_inst = instq.return_reservation_station_inst(sim_cycle))
		  != NULL ) {	
	
	lsq_data_flag = false;
	inst = rvs_inst->inst;
  	MD_SET_OPCODE(op, inst);
	issue_flag = false;
	
	assert(rvs_inst->exec_time < 0);
	
	if (rvs_inst->adr != 0  //$B%a%b%j%"%/%;%9%"%I%l%97W;;:Q$_(B
		&& rvs_inst->adr < 0x10000000 ) { //$B%"%I%l%9$,NN0h30(B
	  //$BL?Na$rGK4~(B
	  rvs_inst->exec_ok = false;
	  order_buf.deletelist(rvs_inst->seq);
	  addr_order_buf.deletelist(rvs_inst->seq);
	  instq.deletelist(rvs_inst->seq);
	} else if
	  (!(MD_OP_FLAGS(op) & F_MEM) //$B%a%b%j%"%/%;%9L?Na$G$J$$(B
	   || ((MD_OP_FLAGS(op) & F_MEM) //$B%a%b%j%"%/%;%9L?Na(B
		   /*$B%a%b%j%"%/%;%9%"%I%l%9$K0MB84X78$,$J$$$+!"(B
			 $B$"$k>l9g$K(BData$B$r(BLSQ$B$+$i3NJ]2D(B */
		   && ( addr_order_buf.checklist(rvs_inst->seq, rvs_inst->adr,
										 rvs_inst->adr2, rvs_inst->adr_area,
										 rvs_inst->l_or_s, &lsq_data,
										 &lsq_data2, &lsq_data_flag,
										 rvs_inst->use_lsq_flag)))) {
	  if ( lsq_data_flag ) { //LSQ$B$N(BData$B$r3NJ]$G$-$?(B
		rvs_inst->lsq_data = lsq_data;
		rvs_inst->lsq_data2 = lsq_data2;
		rvs_inst->lsq_data_h = lsq_data_half;
		rvs_inst->lsq_data_b = lsq_data_byte;
		rvs_inst->lsq_data_flag = lsq_data_flag;
	  }
	  
	  addr_ok = true;
	  /*memory access address$B$r7W;;$7$F$+$i(B
		1$B%/%m%C%/0J>e7P2a$7$F$$$k$+%A%'%C%/(B */
	  if ((MD_OP_FLAGS(op) & F_MEM)
		  && !(rvs_inst->adr_flag)
		  && ( rvs_inst->adr == 0 || rvs_inst->calc_time == sim_cycle )) {
		addr_ok = false;
	  }
	  
	  if (addr_ok) {//memory access address$B$r7W;;$7$F$+$i(B1$B%/%m%C%/0J>e7P2a(B

		//$B%l%8%9%?4V0MB84X78$N%A%'%C%/3+;O(B
		int label[D_SIZE], opr[D_SIZE], d_rw[D_SIZE], my_rw[D_SIZE];
		for (int i = 0; i < D_SIZE; i++ )
		  label[i] = opr[i] = d_rw[i] = my_rw[i] = 0;
		depend = order_buf.check_order(rvs_inst->seq, rvs_inst->adr_flag,
									   rvs_inst->in1, rvs_inst->in2,
									   rvs_inst->in3, rvs_inst->in4,
									   rvs_inst->out1, rvs_inst->out2,
									   rvs_inst->out3, label,
									   opr, d_rw, my_rw);	  
		
		//$B0MB8(BData$B$r(Breorder buffer$B$+$iFI$_=P$;$k$+%A%'%C%/(B
		int e_flag = 1;
		for ( int a = 0; a < depend; a++ ) {
		  if ( d_rw[a] == 1 && my_rw[a] == 0 ) {
			if ( e_flag != -1 )
			  e_flag = 2;
		  }
		  if ( ( d_rw[a] == 1 && my_rw[a] == 0 ) 
			   && !wb_buf.is(label[a]) ) { //$B0MB8(BData$B$,(Breorder buffer$B$K$J$$(B
			e_flag = -1; //$B$^$@<B9T$G$-$J$$(B
			break;
		  }
		}
		
		in1_flag = in2_flag = in3_flag = in4_flag = false;		
		
		if ( rvs_inst->adr_flag ) {//memory access address$B$r7W;;$9$kL?Na(B
		  if ( e_flag == 2 ) { //$B0MB8(BData$B$,(Breorder buffer$B$K$"$k(B
			for(int a = 1; a <=4; a++ ) {
			  s_i[a] = s1[a] = s2[a] = 0;
			  s_ff[a] = s_fd[a] = 0;
			  s_i_flag[a] = s_ff_flag[a]
				= s_fd_flag[a] = s1_flag[a] = s2_flag[a] = false;
			}
			//Read$B%l%8%9%?(B2$B$H0MB8$9$k(BData$B$r(Breorder buffer$B$+$i<hF@(B
			if ( rvs_inst->in2 != 0 ) {
			  for ( int a = 0; a < depend; a++ ) {
				if ( rvs_inst->in2 == opr[a] && d_rw[a] == 1 ) {
				  wb_buf.get_data(opr[a], label[a], &s1_flag[2], &s1[2],
								  &s2_flag[2], &s2[2], &s_i_flag[2], &s_i[2],
								  &s_ff_flag[2], &s_ff[2], &s_fd_flag[2],
								  &s_fd[2], sim_cycle);
				  in2_flag = true;
				  break;
				}
			  }
			}
			//Read$B%l%8%9%?(B3$B$H0MB8$9$k(BData$B$r(Breorder buffer$B$+$i<hF@(B
			if ( rvs_inst->in3 != 0 ) {
			  for ( int a = 0; a < depend; a++ ) {
				if ( rvs_inst->in3 == opr[a] && d_rw[a] == 1 ) {
				  wb_buf.get_data(opr[a], label[a], &s1_flag[3], &s1[3],
								  &s2_flag[3], &s2[3], &s_i_flag[3], &s_i[3],
								  &s_ff_flag[3], &s_ff[3], &s_fd_flag[3],
								  &s_fd[3], sim_cycle);
				  in3_flag = true;
				  break;
				}
			  }
			}
		  }
		}

		if ( depend < 0 //$B%l%8%9%?4V0MB84X78$,$J$$(B
			 || e_flag >= 1 ) {  //$B0MB8(BData$B$r(Breorder buffer$B$+$i<hF@$G$-$?(B
		  if ( rvs_inst->adr_flag ) { //memory access address$B7W;;L?Na(B
			//Functional Unit$B$N3NJ]$r;n$_$k(B
			fu = resrc_simple.res_get(fu_pool, MD_OP_FUCLASS(MD_AGEN_OP));
			if (fu) { //Functional Unit$B$r3NJ](B
			  if (fu->master->busy)
				panic("functional unit already in use");
			  issue_num++;
			  fu->master->busy = fu->issuelat;			  
			  rvs_inst->exec_ok = false;
			  order_buf.deletelist(rvs_inst->seq);
			  regs.regs_R[MD_REG_ZERO] = 0;
			  addr = addr2 = 0;
			  addr_area = 0;
			  load_flag = use_lsq_flag = false;

#ifdef COUT_PIPELINE  
			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                  +ISSUE:" << hex << rvs_inst->now_PC << " " << NAME << " (" << dec << rvs_inst->seq << ") or";  \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
				default:
				  break;
				}
#endif

			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_addr_impl)(rvs_inst);  \
	  break;
#include "machine.def" 
				default:
				  break;
				}
			  
			  //$B7W;;$7$?(Bmemory access address$B$r<!$KB3$/(BLoad/Store$BL?Na$KEO$9(B
			  rvs_inst->next->adr = addr;
			  rvs_inst->next->adr2 = addr2;			  
			  rvs_inst->next->calc_time = sim_cycle;
			  rvs_inst->next->adr_area = addr_area;
			  rvs_inst->next->load_flag = load_flag;
			  rvs_inst->next->use_lsq_flag = use_lsq_flag;
			  load_flag = false;

			  if (MD_OP_FLAGS(op) & F_MEM) {
				if ( rvs_inst->branch_result == 1 ) {
				  if ( addr < 0x10000000 )
					instq.deletelist(rvs_inst->seq);
				}
				addr_order_buf.updatelist(rvs_inst->seq+1,
										  addr, addr2, addr_area);
			  } else
				assert( addr == 0 );
			  instq.deletelist(rvs_inst->seq);
			}
		  } else if (((MD_OP_FLAGS(op) & (F_MEM|F_STORE)) //Store$BL?Na(B
					  == (F_MEM|F_STORE))
					 && (!rvs_inst->load_flag //Load$B$7$J$$(B
						 || rvs_inst->load_flag //Load$B$9$k(B
						 && rvs_inst->load_finish)) { //$B%a%b%j$N(BData$B3NJ]:Q(B
			rvs_inst->exec_ok = false;
			rvs_inst->exec_time = sim_cycle + 1;
			exec_buf.insertlist(rvs_inst);
			wb_buf.insertlist(rvs_inst, 1, 1, 0, 0, 0, 0, 0, 0,
							  0, 0, 0, 0, sim_cycle);

			//Store$BL?Na$8$c$J$/!"(Bmemory access address$B7W;;L?Na$G$b$J$$(B
		  } else if (!((MD_OP_FLAGS(op)&(F_MEM|F_STORE)) == (F_MEM|F_STORE))
					 && !rvs_inst->adr_flag) {
			issue_flag = true; //issue$B$7$F(BO.K
			//Store$BL?Na$@$,!"%a%b%j$+$i(BData$B$r(BLoad$B$9$k(B
		  } else if (((MD_OP_FLAGS(op)&(F_MEM|F_STORE)) == (F_MEM|F_STORE))
					 && rvs_inst->load_flag)
			issue_flag = true; //issue$B$7$F(BO.K
		}
	  }
	}

	if ( issue_flag ) {	//issue$B$7$F(BO.K
	  if (MD_OP_FUCLASS(op) != NA) { //Functional Unit$B$r;HMQ$9$k(B
		
		//Load$BL?Na$^$?$O(BStore$BL?Na(B
		if (((MD_OP_FLAGS(op) & (F_MEM|F_LOAD)) == (F_MEM|F_LOAD))
			|| ((MD_OP_FLAGS(op) & (F_MEM|F_STORE)) == (F_MEM|F_STORE))) {
		  access_adr = rvs_inst->adr;
		  if ((access_adr >= SHARED_MEM_FOR_SYNC_TOP //$BDL>o$N(Bsh_mem access
			   && access_adr <= SHARED_MEM_BOTTOM)
			  || ( access_adr == CALC_START //$BE}7W%G!<%?<hF@3+;O%"%/%;%9(B
				   && rvs_inst->branch_result == 1 ) //$BJ,4t3NDj(B
			  || ( access_adr == CALC_END //$BE}7W%G!<%?<hF@=*N;%"%/%;%9(B
				   && rvs_inst->branch_result == 1 ) //$BJ,4t3NDj(B
			  || (access_adr >= TAS_MEM_TOP  //Test and Set$B%"%/%;%9(B
				  && access_adr <= TAS_MEM_BOTTOM
				  && rvs_inst->branch_result == 1 ) //$BJ,4t3NDj(B
			  || (access_adr >= FAD_MEM_TOP //Fetch and Dec$B%"%/%;%9(B
				  && access_adr <= FAD_MEM_BOTTOM
				  && rvs_inst->branch_result == 1 )) { //$BJ,4t3NDj(B
			
			if ( access_adr == CALC_START ) //$BE}7W%G!<%?<hF@3+;O%"%/%;%9(B
			  clock_flag = true; //$BE}7W%G!<%?<hF@%U%i%0(Btrue
			if ( access_adr == CALC_END ) { //$BE}7W%G!<%?<hF@=*N;%"%/%;%9(B
			  clock_flag = false; //$BE}7W%G!<%?<hF@%U%i%0(Bfalse
			  
#ifdef GET_PU_STATISTICS
			  //$BE}7W%G!<%?<hF@=*N;%"%/%;%9$,Mh$?=V4V$K=PNO(B
			  cout << "PU[" << puid << "] "
				   << dec << clock << " "
				   << pts_local_read_access << " "
				   << pts_shared_read_access << " "
				   << pts_sync_read_access << " "
				   << pts_local_write_access << " "
				   << pts_shared_write_access << " "
				   << pts_sync_write_access << " "
				   << pts_other_inst
				   << endl;
#endif
			}
			
			addr_area = rvs_inst->adr_area;
			req_label = rvs_inst->seq;
			exec_stop_flag = false;
			
#ifdef COUT_PIPELINE  
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                   ISSUE:" << hex << rvs_inst->now_PC << " " << NAME << " (" << dec << rvs_inst->seq << ") or";   \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
			  default:
				break;
			  }
#endif
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      SYMCAT(OP,_set_req_impl)(rvs_inst);  \
	  break;
#include "machine.def" 
			  default:
				break;
			  }
			
			if ( !exec_stop_flag ) {//$BL5;v30It%"%/%;%9%]!<%H$KH/9T$G$-$?(B
			  
			  issue_num++;
			  //$BE}7W%G!<%?$N%+%&%s%H(B
			  if ( access_adr < SHARED_MEM_TOP )
				sync_read_access++;
			  else if ( access_adr >= SHARED_MEM_TOP )
				shared_read_access++;
			  if ( clock_flag ) {
				if ( access_adr < SHARED_MEM_TOP )
				  pts_sync_read_access++;
				else if ( access_adr >= SHARED_MEM_TOP )
				  pts_shared_read_access++;
			  }
			  
			  rvs_inst->exec_ok = false;
			  rvs_inst->exec_time = sim_cycle + 1;
			  exec_buf.insertlist(rvs_inst); 
			}
		  } else if (/*Test and Set$B%"%/%;%9$+(BFetch and Dec$B%"%/%;%9$@$,!"(B
					   $BJ,4t$,3NDj$7$F$J$$(B */
					 ( access_adr >= TAS_MEM_TOP
					   && access_adr <= TAS_MEM_BOTTOM )
					 || ( access_adr >= FAD_MEM_TOP
						  && access_adr <= FAD_MEM_BOTTOM )) {			  
			//do nothing
		  } else { //$B%m!<%+%k%a%b%j%"%/%;%9(B
			//Functional Unit$B$N3NJ]$r;n$_$k(B
			fu = resrc_simple.res_get(fu_pool, MD_OP_FUCLASS(op));
			if (fu) { //Functional Unit$B$r3NJ](B
			  issue_num++;
			  if (fu->master->busy)
				panic("functional unit already in use");
			  fu->master->busy = fu->issuelat;
			  rvs_inst->fu= fu;
			  
#ifdef COUT_PIPELINE  
			  switch (op)
				{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                   ISSUE:" << hex << rvs_inst->now_PC << " " << NAME << " (" << dec << rvs_inst->seq << ") or";  \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def"
				default:
				  break;
				}
#endif
			  /* $B%-%c%C%7%e%"%/%;%9$7$?$3$H$r(Bcache$B$KDLCN$7!"(B
				 $BJV$jCM$H$7$F%"%/%;%9%l%$%F%s%7$r<hF@(B */
			  load_lat = cache_dl1_lat;
			  if ( cache_dl1 ) {
				if ( cache_dl2 ) {
				  if ( clock_flag && cache_flush_flag ) {
					load_lat
					  = stat_cache.cache_access(s_cache_dl1, Read,
												(access_adr & ~3), NULL, 4,
												sim_cycle, NULL, NULL, 0,
												s_cache_dl2,
												mem_bus_width,
												mem_lat[0], mem_lat[1]);
				  } else {
					load_lat
					  = cache.cache_access(cache_dl1, Read,
										   (access_adr & ~3), NULL, 4,
										   sim_cycle, NULL, NULL, 0,
										   cache_dl2,
										   mem_bus_width,
										   mem_lat[0], mem_lat[1]);
				  }
				} else {
				  if ( clock_flag && cache_flush_flag ) {
					load_lat
					  = stat_cache.cache_access(s_cache_dl1, Read,
												(access_adr & ~3), NULL, 4,
												sim_cycle, NULL, NULL, 0, 0,
												mem_bus_width,
												mem_lat[0], mem_lat[1]);
				  } else {
					load_lat
					  = cache.cache_access(cache_dl1, Read,
										   (access_adr & ~3), NULL, 4,
										   sim_cycle, NULL, NULL, 0, 0,
										   mem_bus_width,
										   mem_lat[0], mem_lat[1]);
				  }
				}
			  }
			  
			  if ( dtlb ) {
				if ( clock_flag && cache_flush_flag ) {
				  tlb_lat
					= stat_cache.cache_access(s_dtlb, Read,
											  (access_adr & ~3),
											  NULL, 4, sim_cycle, NULL, NULL,
											  tlb_miss_lat, 0, mem_bus_width,
											  mem_lat[0], mem_lat[1]);
				} else {
				  tlb_lat
					= cache.cache_access(dtlb, Read, (access_adr & ~3),
										 NULL, 4, sim_cycle, NULL, NULL,
										 tlb_miss_lat, 0, mem_bus_width,
										 mem_lat[0], mem_lat[1]);
				}
				if (tlb_lat > load_lat)
				  load_lat = tlb_lat;
			  }
			  
			  //$BE}7W%G!<%?$N%+%&%s%H(B
			  local_read_access++;
			  if ( clock_flag )
				pts_local_read_access++;
			  
			  rvs_inst->exec_ok = false;			  
			  rvs_inst->exec_time = sim_cycle + load_lat;
			  exec_buf.insertlist(rvs_inst); 
			} else { //Functional Unit$B$r3NJ]$G$-$J$+$C$?(B
			  //do nothing
			}
		  }
		} else { // !load and !store
		  
		  //Functional Unit$B$N3NJ]$r;n$_$k(B
		  fu = resrc_simple.res_get(fu_pool, MD_OP_FUCLASS(op));
		  if (fu) { //Functional Unit$B$r3NJ](B
			issue_num++;
			if (fu->master->busy)
			  panic("functional unit already in use");
			fu->master->busy = fu->issuelat;
			rvs_inst->fu= fu;			  
			
#ifdef COUT_PIPELINE
			switch (op)
			  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                   ISSUE:" << hex << rvs_inst->now_PC << " " << NAME << " (" << dec << rvs_inst->seq << ") or";  \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def" 
			  default:
				break;
			  }
#endif
			rvs_inst->exec_ok = false;
			rvs_inst->exec_time = sim_cycle + fu->oplat;
			exec_buf.insertlist(rvs_inst); 
			
			//$BE}7W%G!<%?$N%+%&%s%H(B
			other_inst++;
			if ( clock_flag )
			  pts_other_inst++;
		  }
		}
	  } else { //Functional Unit$B$r;HMQ$7$J$$(B
#ifdef COUT_PIPELINE  
		switch (op)
		  {
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << "                   ISSUE:" << hex << rvs_inst->now_PC << " " << NAME << " (" << dec << rvs_inst->seq << ") or";  \
      if (O1) cout << "[" << O1 << "]"; \
      if (O2) cout << "[" << O2 << "]"; \
      if (O3) cout << "[" << O3 << "]"; \
      if (!O1&&!O2&&!O3) cout << "=nil"; \
      cout << " ir"; \
      if (I1) cout << "[" << I1 << "]"; \
      if (I2) cout << "[" << I2 << "]"; \
      if (I3) cout << "[" << I3 << "]"; \
      if (I4) cout << "[" << I4 << "]"; \
      if (!I1&&!I2&&!I3&&!I4) cout << "=nil"; \
      cout << endl;  \
	  break;
#include "machine.def" 
		  default:
			break;
		  }
#endif
		rvs_inst->exec_ok = false;
		rvs_inst->exec_time = sim_cycle + 1;
		exec_buf.insertlist(rvs_inst); 

		//$BE}7W%G!<%?$N%+%&%s%H(B
		other_inst++;
		if ( clock_flag )
		  pts_other_inst++;
	  }
	} else { 
	  //do nothing
	}
  }
}

void
isim_processor::ruu_dispatch(void)
{
  int n_dispatched;			/* total insts dispatched */
  enum md_opcode op;			/* decoded opcode enum */
  int out1, out2, out3, in1, in2, in3, in4;	/* output/input register names */
  /* effective address, if load/store */
  struct bpred_update_t *dir_update_ptr;/* branch predictor dir update ptr */
  int stack_recover_idx;		/* bpred retstack recovery index */
  int is_write;				/* store? */
  n_dispatched = 0;
  
  while (/* instruction decode B/W left? */
		 n_dispatched < (ruu_decode_width * fetch_speed)
		 /* insts still available from fetch unit? */
		 && fetch_num != 0 
		 && instq.get_ruu_num() < RUU_size //RUU$B$,K~GU$G$J$$(B
		 && instq.get_lsq_num() < LSQ_size ) { //LSQ$B$,K~GU$G$J$$(B 

	/* get the next instruction from the IFETCH -> DISPATCH queue */
	inst = fetch_data[fetch_head].IR;
	regs.regs_PC = fetch_data[fetch_head].regs_PC;
	pred_PC = fetch_data[fetch_head].pred_PC;
	dir_update_ptr = &(fetch_data[fetch_head].dir_update);
	stack_recover_idx = fetch_data[fetch_head].stack_recover_idx;
	
	/* decode the inst */
	MD_SET_OPCODE(op, inst);
	
	/* compute default next PC */
	regs.regs_NPC = regs.regs_PC + sizeof(md_inst_t);

	/* drain RUU for TRAPs and system calls */
	//if (MD_OP_FLAGS(op) & F_TRAP) {
	//if (RUU_num != 0)
	//break;
	//}

	/* maintain $r0 semantics (in spec and non-spec space) */
	regs.regs_R[MD_REG_ZERO] = 0; 
#ifdef TARGET_ALPHA
	regs.regs_F.d[MD_REG_ZERO] = 0.0; 
#endif /* TARGET_ALPHA */
	
	/* one more non-speculative instruction executed */
	sim_num_insn++;
	
	/* default effective address (none) and access */
	addr = 0; is_write = FALSE;
	
	/* set default fault - none */
	fault = md_fault_none;

	if ((MD_OP_FLAGS(op) & F_TRAP)){ 
	  
	  //RUU$B!"(BLSQ$B$,6u$K$J$k$^$GBT$D(B
	  if ( instq.get_ruu_num() > 0 || instq.get_lsq_num() > 0 )
		return;
	  
	  switch (op)
		{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:					\
	  out1 = O1; out2 = O2;	out3 = O3;					\
	  in1 = I1; in2 = I2; in3 = I3;	in4 = I4;				\
	  SYMCAT(OP,_impl)();						\
	  break;
#include "machine.def"
		default:
		  op = MD_NOP_OP;
		  out1 = NA; out2 = NA; out3 = NA;
		  in1 = NA; in2 = NA; in3 = NA; in4 = NA;
		}
	} else {

#ifdef COUT_PIPELINE  
	  switch (op)
		{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:	\
      cout << " DISPCH:" << hex << regs.regs_PC << " " << NAME << " (" << dec << inst_seq+1 << ")" << endl;   \
      break;
#include "machine.def"
		default:
		  break;
		}
#endif

 	  switch (op)
		{
#undef DEFINST
#define DEFINST(OP,MSK,NAME,OPFORM,RES,CLASS,O1,O2,O3,I1,I2,I3,I4)		\
	case OP:			\
	  out1 = O1; out2 = O2;	out3 = O3;					\
	  in1 = I1; in2 = I2; in3 = I3;	in4 = I4;				\
	  break;
#include "machine.def"
		default:
		  op = MD_NOP_OP;
		  out1 = NA; out2 = NA; out3 = NA;
		  in1 = NA; in2 = NA; in3 = NA; in4 = NA;
		}
	}

	if (fault != md_fault_none)
	  fatal("non-speculative fault (%d) detected @ 0x%08p",
			fault, regs.regs_PC);
	
	/* update memory access stats */
	if (MD_OP_FLAGS(op) & F_MEM) {
	  sim_total_refs++;
	  if ( clock_flag )
		pts_sim_total_refs++;
	  
	  if (MD_OP_FLAGS(op) & F_STORE)
		is_write = TRUE;
	  else {
		sim_total_loads++;
		if ( clock_flag )		
		  pts_sim_total_loads++;
	  }
	}

	/* is this a NOP */
	if (op != MD_NOP_OP) {

	  ++inst_seq;

	  if ( !(MD_OP_FLAGS(op) & F_TRAP)) {
		if ( !(MD_OP_FLAGS(op) & F_MEM) ) {//$B%a%b%j%"%/%;%9$7$J$$L?Na(B

		  //$B;HMQ%l%8%9%?$rDLCN(B
		  if ( in1 != 0) order_buf.insertlist(inst_seq, in1, 0);
		  if ( in2 != 0) order_buf.insertlist(inst_seq, in2, 0);
		  if ( in3 != 0) order_buf.insertlist(inst_seq, in3, 0);
		  if ( in4 != 0) order_buf.insertlist(inst_seq, in4, 0);
		  if ( out1 != 0) order_buf.insertlist(inst_seq,out1, 1);
		  if ( out2 != 0) order_buf.insertlist(inst_seq,out2, 1);
		  if ( out3 != 0) order_buf.insertlist(inst_seq,out3, 1);

		  //$BL?Na$N>pJs$rEjF~(B
		  instq.insert(fetch_data[fetch_head].IR, inst_seq,
					   false, //$B%a%b%j%"%/%;%9%"%I%l%9$N7W;;L?Na$G$J$$(B
					   prev_branch_PC, regs.regs_PC,
					   regs.regs_NPC, pred_PC, dir_update_ptr,
					   in1, in2, in3, in4, out1, out2, out3,
					   0, 0, branch_seq);
		} else { //$B%a%b%j%"%/%;%9$9$kL?Na(B

		  //$B%a%b%j%"%/%;%9%"%I%l%9$N7W;;$K;HMQ$9$k%l%8%9%?$rDLCN(B
		  if ( in2 != 0 ) order_buf.insertlist(inst_seq, in2, 0);
		  if ( in3 != 0 ) order_buf.insertlist(inst_seq, in3, 0);

		  //$BL?Na$N>pJs$rEjF~(B
		  instq.insert(fetch_data[fetch_head].IR, inst_seq,
					   true, //$B%a%b%j%"%/%;%9%"%I%l%9$N7W;;L?Na$G$"$k(B
					   prev_branch_PC, regs.regs_PC,
					   regs.regs_NPC, pred_PC, dir_update_ptr,
					   0, in2, in3, 0, 0, 0, 0, 0, 0, branch_seq);
		}
	  }

	  /* split ld/st's into two operations: eff addr comp + mem access */
	  if (MD_OP_FLAGS(op) & F_MEM) { //$B%a%b%j%"%/%;%9L?Na(B

		++inst_seq;
		
		//$B%a%b%j%"%/%;%9%"%I%l%9$N7W;;0J30$K;HMQ$9$k%l%8%9%?$rDLCN(B
		if ( in1 != 0 ) order_buf.insertlist(inst_seq, in1, 0);
		if ( in4 != 0 ) order_buf.insertlist(inst_seq, in4, 0);
		if ( out1 != 0 ) order_buf.insertlist(inst_seq, out1, 1);
		if ( out2 != 0 ) order_buf.insertlist(inst_seq, out2, 1);
		if ( out3 != 0 ) order_buf.insertlist(inst_seq, out3, 1);

		bool ls_flag = 0;		
		if((MD_OP_FLAGS(op) & (F_MEM|F_STORE)) == (F_MEM|F_STORE))
		  ls_flag = 1;

		//$BL?Na$N>pJs$rEjF~(B
		instq.insert(fetch_data[fetch_head].IR, inst_seq,
					 false, //$B%a%b%j%"%/%;%9%"%I%l%9$N7W;;L?Na$G$J$$(B
					 prev_branch_PC, regs.regs_PC,
					 regs.regs_NPC, pred_PC, NULL,
					 in1, 0, 0, in4, out1, out2, out3,
					 ls_flag, 1, branch_seq);

		addr_order_buf.insertlist(inst_seq, ls_flag);

		/* install operation in the RUU and LSQ */
		n_dispatched++;
	  } else /* !(MD_OP_FLAGS(op) & F_MEM) */ {

		/* install operation in the RUU */
		n_dispatched++;
	  }
	}

	if ( MD_OP_FLAGS(op) & F_CTRL){
	  branch_seq = inst_seq;
	  prev_branch_PC = regs.regs_PC;
	}

	/* one more instruction executed, speculative or otherwise */
	sim_total_insn++;
	if ( clock_flag )
	  pts_sim_total_insn++;

	if (MD_OP_FLAGS(op) & F_CTRL) {
	  sim_total_branches++;
	  if ( clock_flag )
		pts_sim_total_branches++;
	}

	/* consume instruction from IFETCH -> DISPATCH queue */
	fetch_head = (fetch_head+1) & (ruu_ifq_size - 1);
	fetch_num--;
  }
}

/*
 *  RUU_FETCH() - instruction fetch pipeline stage(s)
 */
/* initialize the instruction fetch pipeline stage */
void
isim_processor::fetch_init()
{
  /* allocate the IFETCH -> DISPATCH instruction queue */
  fetch_data =
    (struct fetch_rec *)calloc(ruu_ifq_size, sizeof(struct fetch_rec));
  if (!fetch_data)
    fatal("out of virtual memory");
  
  fetch_num = 0;
  fetch_tail = fetch_head = 0;

  IFQ_count = 0;
  IFQ_fcount = 0;
  pts_IFQ_count = 0;
  pts_IFQ_fcount = 0;
  
}

/* fetch up as many instruction as one branch prediction and one cache line
   acess will support without overflowing the IFETCH -> DISPATCH QUEUE */
void
isim_processor::ruu_fetch(void)
{
  int i, lat, tlb_lat, done = FALSE;
  int stack_recover_idx;
  int branch_cnt;

  if ( clock_flag ) clock++;

  for (i=0, branch_cnt=0;
       /* fetch up to as many instruction as the DISPATCH stage can decode */
       i < (ruu_decode_width * fetch_speed)
		 /* fetch until IFETCH -> DISPATCH queue fills */
		 && fetch_num < ruu_ifq_size
		 /* and no IFETCH blocking condition encountered */
		 && !done; i++) {
	
	/* fetch an instruction at the next predicted fetch address */
	fetch_regs_PC = fetch_pred_PC;

	/* is this a bogus text address? (can happen on mis-spec path) */
	if (load_simple.ld_text_base <= fetch_regs_PC
		&& fetch_regs_PC < (load_simple.ld_text_base
							+load_simple.ld_text_size)
		&& !(fetch_regs_PC & (sizeof(md_inst_t)-1))) {

	  md_fetch_inst(fetch_regs_PC);
	  
	  /* address is within program text, read instruction from memory */
	  lat = cache_il1_lat;

	  if (cache_il1) {
		if (cache_il2) {
		  if ( clock_flag && cache_flush_flag ) {
			lat
			  = stat_cache.cache_access(s_cache_il1, Read, 
										iacompress(fetch_regs_PC),
										NULL, iscompress(sizeof(md_inst_t)), 
										sim_cycle, NULL, NULL, 0, s_cache_il2,
										mem_bus_width,
										mem_lat[0], mem_lat[1]);
		  } else {
			lat
			  = cache.cache_access(cache_il1, Read, 
								   iacompress(fetch_regs_PC),
								   NULL, iscompress(sizeof(md_inst_t)), 
								   sim_cycle, NULL, NULL, 0, cache_il2,
								   mem_bus_width,
								   mem_lat[0], mem_lat[1]);
		  }
		} else {
		  if ( clock_flag && cache_flush_flag ) {
			lat
			  = stat_cache.cache_access(s_cache_il1, Read, 
										iacompress(fetch_regs_PC),
										NULL, iscompress(sizeof(md_inst_t)), 
										sim_cycle, NULL, NULL, 0, 0, 
										mem_bus_width,
										mem_lat[0], mem_lat[1]);
		  } else {
			lat
			  = cache.cache_access(cache_il1, Read, 
								   iacompress(fetch_regs_PC),
								   NULL, iscompress(sizeof(md_inst_t)), 
								   sim_cycle, NULL, NULL, 0, 0, 
								   mem_bus_width,
								   mem_lat[0], mem_lat[1]);
		  }
		}
	  }
	  
	  if (itlb) {
		/* access the I-TLB, NOTE: this code will initiate
		   speculative TLB misses */
		if ( clock_flag && cache_flush_flag ) {
		  tlb_lat =
			stat_cache.cache_access(s_itlb, Read, iacompress(fetch_regs_PC),
									NULL, iscompress(sizeof(md_inst_t)), 
									sim_cycle, NULL, NULL, tlb_miss_lat, 0,
									mem_bus_width, mem_lat[0], mem_lat[1]);
		} else {
		  tlb_lat =
			cache.cache_access(itlb, Read, iacompress(fetch_regs_PC),
							   NULL, iscompress(sizeof(md_inst_t)), 
							   sim_cycle, NULL, NULL, tlb_miss_lat, 0,
							   mem_bus_width, mem_lat[0], mem_lat[1]);
		}
		
		/* I-cache/I-TLB accesses occur in parallel */
		if ( tlb_lat >= lat )
		  lat = tlb_lat;
	  }
	  
	  /* I-cache/I-TLB miss? assumes I-cache hit >= I-TLB hit */
	  if (lat != cache_il1_lat) {
		/* I-cache miss, block fetch until it is resolved */
		ruu_fetch_issue_delay += lat - 1;
		break;
	  }
	  
	  /* else, I-cache/I-TLB hit */
	} else {
	  /* fetch PC is bogus, send a NOP down the pipeline */
	  inst = MD_NOP_INST;
	}
	
	/* possibly use the BTB target */
	if (pred) {
	  enum md_opcode op;
	  
	  /* pre-decode instruction, used for bpred stats recording */
	  MD_SET_OPCODE(op, inst);
	  
	  /* get the next predicted fetch address; only use branch predictor
		 result for branches (assumes pre-decode bits); NOTE: returned
		 value may be 1 if bpred can only predict a direction */	
	  if (MD_OP_FLAGS(op) & F_CTRL)
		fetch_pred_PC
		  = bpred.bpred_lookup(pred,
							   /* branch address */fetch_regs_PC,
							   /* target address *//* FIXME: not computed */0,
							   /* opcode */op,
							   /* call? */MD_IS_CALL(op),
							   /* return? */MD_IS_RETURN(op),
							   /* updt */&(fetch_data[fetch_tail].dir_update),
							   /* RSB index */&stack_recover_idx);
	  else
		fetch_pred_PC = 0;
	  
	  /* valid address returned from branch predictor? */
	  if (!fetch_pred_PC) {
		/* no predicted taken target, attempt not taken target */
		fetch_pred_PC = fetch_regs_PC + sizeof(md_inst_t);
	  } else {
		/* go with target, NOTE: discontinuous fetch, so terminate */
		branch_cnt++;
		if (branch_cnt >= fetch_speed)
		  done = TRUE;
	  }
	} else {
	  /* no predictor, just default to predict not taken, and
		 continue fetching instructions linearly */
	  fetch_pred_PC = fetch_regs_PC + sizeof(md_inst_t);
	}

	/* commit this instruction to the IFETCH -> DISPATCH queue */
	fetch_data[fetch_tail].IR = inst;
	fetch_data[fetch_tail].regs_PC = fetch_regs_PC;
	fetch_data[fetch_tail].pred_PC = fetch_pred_PC;
	fetch_data[fetch_tail].stack_recover_idx = stack_recover_idx;
	
	/* adjust instruction fetch queue */
	fetch_tail = (fetch_tail + 1) & (ruu_ifq_size - 1);
	fetch_num++;
  }
}

void
isim_processor::clock_out(void)
{
  ruu_release_fu();
  ruu_commit();
  ruu_writeback();
  ruu_issue();
}

void
isim_processor::clock_in(void)
{
  ruu_dispatch();
  if (!ruu_fetch_issue_delay)
	ruu_fetch();
  else
	ruu_fetch_issue_delay--;

  get_data();
  
  IFQ_count += fetch_num;
  IFQ_fcount += ((fetch_num == ruu_ifq_size) ? 1 : 0);
  RUU_count += instq.get_ruu_num();
  RUU_fcount += ((instq.get_ruu_num() == RUU_size) ? 1 : 0);
  LSQ_count += instq.get_lsq_num();
  LSQ_fcount += ((instq.get_lsq_num() == LSQ_size) ? 1 : 0);	
  if ( clock_flag ) {
	pts_IFQ_count += fetch_num;
	pts_IFQ_fcount += ((fetch_num == ruu_ifq_size) ? 1 : 0);
	pts_RUU_count += instq.get_ruu_num();
	pts_RUU_fcount += ((instq.get_ruu_num() == RUU_size) ? 1 : 0);
	pts_LSQ_count += instq.get_lsq_num();
	pts_LSQ_fcount += ((instq.get_lsq_num() == LSQ_size) ? 1 : 0);	
  }
}

