/src/wattch/power.cc
C++ | 2206 lines | 950 code | 316 blank | 940 comment | 40 complexity | 8726359bcbeabf7b0ab452c414ed8ef1 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
- /* I inclued this copyright since we're using Cacti for some stuff */
- /*------------------------------------------------------------
- * Copyright 1994 Digital Equipment Corporation and Steve Wilton
- * All Rights Reserved
- *
- * Permission to use, copy, and modify this software and its documentation is
- * hereby granted only under the following terms and conditions. Both the
- * above copyright notice and this permission notice must appear in all copies
- * of the software, derivative works or modified versions, and any portions
- * thereof, and both notices must appear in supporting documentation.
- *
- * Users of this software agree to the terms and conditions set forth herein,
- * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
- * free right and license under any changes, enhancements or extensions
- * made to the core functions of the software, including but not limited to
- * those affording compatibility with other hardware or software
- * environments, but excluding applications which incorporate this software.
- * Users further agree to use their best efforts to return to Digital any
- * such changes, enhancements or extensions that they make and inform Digital
- * of noteworthy uses of this software. Correspondence should be provided
- * to Digital at:
- *
- * Director of Licensing
- * Western Research Laboratory
- * Digital Equipment Corporation
- * 100 Hamilton Avenue
- * Palo Alto, California 94301
- *
- * This software may be distributed (but not offered for sale or transferred
- * for compensation) to third parties, provided such third parties agree to
- * abide by the terms and conditions of this notice.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
- * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
- * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
- * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
- * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
- * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
- * SOFTWARE.
- *------------------------------------------------------------*/
- #include <cassert>
- #include <cmath>
- #include <cstdio>
- #include <cstdlib>
- #include <string>
- #include "power.hh"
- using namespace std;
- /*
- #include "RC.h"
- #include "bpred.h"
- #include "cache.h"
- #include "dtm.h"
- #include "hotspot.h"
- #include "machine.h"
- #include "modes-opts.h" // lym
- #include "sim.h"
- */
- // TODO: add real number
- extern long long Mhz;
- #define verbose false
- #define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
- #define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
- #define Powerfactor (Mhz)*Vdd*Vdd
- #define LowSwingPowerfactor (Mhz)*.2*.2
- /* set scale for crossover (vdd->gnd) currents */
- const double crossover_scaling = 1.2;
- /* set non-ideal turnoff percentage */
- const double turnoff_factor = 0.1;
- /* freq/voltage scaling */
- const double frequency_scaling = 1.0;
- const double voltage_scaling = 1.0;
- #define MSCALE (LSCALE * .624 / .2250)
- int thermal_warmup_done = 0;
- /*----------------------------------------------------------------------*/
- /* static power model results */
- power_result_type power;
- int pow2(int x) {
- return((int)pow(2.0,(double)x));
- }
- double logfour(double x)
- {
- if (x<=0) fprintf(stderr,"%e\n",x);
- return( (double) (log(x)/log(4.0)) );
- }
- /* safer pop count to validate the fast algorithm */
- int pop_count_slow(qword_t bits)
- {
- int count = 0;
- qword_t tmpbits = bits;
- while (tmpbits) {
- if (tmpbits & 1) ++count;
- tmpbits >>= 1;
- }
- return count;
- }
- /* fast pop count */
- int pop_count(qword_t bits)
- {
- #define T unsigned long long
- #define ONES ((T)(-1))
- #define TWO(k) ((T)1 << (k))
- #define CYCL(k) (ONES/(1 + (TWO(TWO(k)))))
- #define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k))
- qword_t x = bits;
- x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0));
- x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1));
- BSUM(x,2);
- BSUM(x,3);
- BSUM(x,4);
- BSUM(x,5);
- return x;
- }
- const int opcode_length = 8;
- const int inst_length = 32;
- int nvreg_width;
- int npreg_width;
- extern int bimod_config[];
- extern struct cache_t *cache_dl1;
- extern struct cache_t *cache_il1;
- extern struct cache_t *cache_dl2;
- extern struct cache_t *cache_il2;
- extern struct cache_t *dtlb;
- extern struct cache_t *itlb;
- /* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
- extern int twolev_config[];
- /* combining predictor config (<meta_table_size> */
- extern int comb_config[];
- /* return address stack (RAS) size */
- extern int ras_size;
- /* BTB predictor config (<num_sets> <associativity>) */
- extern int btb_config[];
- double global_clockcap;
- /*
- static double rename_power=0;
- static double bpred_power=0;
- static double window_power=0;
- static double lsq_power=0;
- static double regfile_power=0;
- //static double icache_power=0;
- //static double dcache_power=0;
- //static double dcache2_power=0;
- static double alu_power=0;
- static double falu_power=0;
- static double resultbus_power=0;
- static double clock_power=0;
- static double rename_power_cc1=0;
- static double bpred_power_cc1=0;
- //static double window_power_cc1=0;
- static double iq_power_cc1=0;
- static double fpq_power_cc1=0;
- static double lsq_power_cc1=0;
- static double regfile_power_cc1=0;
- static double icache_power_cc1=0;
- static double dcache_power_cc1=0;
- //static double dcache2_power_cc1=0;
- static double alu_power_cc1=0;
- static double resultbus_power_cc1=0;
- static double clock_power_cc1=0;
- static double rename_power_cc2=0;
- static double bpred_power_cc2=0;
- //static double window_power_cc2=0;
- static double iq_power_cc2=0;
- static double fpq_power_cc2=0;
- static double lsq_power_cc2=0;
- static double regfile_power_cc2=0;
- static double icache_power_cc2=0;
- static double dcache_power_cc2=0;
- //static double dcache2_power_cc2=0;
- static double alu_power_cc2=0;
- static double resultbus_power_cc2=0;
- static double clock_power_cc2=0;
- */
- /*
- static double rename_power_cc3=0;
- static double bpred_power_cc3=0;
- //static double window_power_cc3=0;
- static double iq_power_cc3=0;
- static double fpq_power_cc3=0;
- static double lsq_power_cc3=0;
- static double icache_power_cc3=0;
- static double dcache_power_cc3=0;
- static double dcache2_power_cc3=0;
- static double regfile_power_cc3=0;
- static double alu_power_cc3=0;
- //static double int_alu_power_cc3=0;
- //static double fp_alu_power_cc3=0;
- static double resultbus_power_cc3=0;
- static double clock_power_cc3=0;
- static double total_cycle_power;
- */
- double cycle_rename_power_cc3=0;
- double cycle_bpred_power_cc3=0;
- double cycle_bpreddir_power_cc3=0;
- double cycle_window_power_cc3=0;
- double cycle_lsq_power_cc3=0;
- double cycle_regfile_power_cc3=0;
- double cycle_icache_power_cc3=0;
- double cycle_dcache_power_cc3=0;
- double cycle_dcache2_power_cc3=0;
- double cycle_alu_power_cc3=0;
- double cycle_falu_power_cc3=0;
- double cycle_resultbus_power_cc3=0;
- double cycle_clock_power_cc3=0;
- static counter_t total_rename_access=0;
- static counter_t total_bpred_access=0;
- static counter_t total_window_access=0;
- static counter_t total_lsq_access=0;
- static counter_t total_iq_access = 0;
- static counter_t total_fpq_access = 0;
- static counter_t total_regfile_access=0;
- static counter_t total_icache_access=0;
- static counter_t total_dcache_access=0;
- static counter_t total_dcache2_access=0;
- static counter_t total_alu_access=0;
- static counter_t total_resultbus_access=0;
- static counter_t max_rename_access;
- static counter_t max_bpred_access;
- static counter_t max_window_access;
- static counter_t max_lsq_access;
- static counter_t max_regfile_access;
- static counter_t max_icache_access;
- static counter_t max_dcache_access;
- static counter_t max_dcache2_access;
- static counter_t max_alu_access;
- static counter_t max_resultbus_access;
- static counter_t zc_cycles;
- int reg_counter = 0;
- int int_window_counter = 0;
- int fp_window_counter = 0;
- double fp_reg_power = 0;
- double rp_rename_power = 0;
- counter_t shadow_sim_cycle = 0;
- counter_t shadow_sim_num_insn = 0;
- /* MCREG Power modeling */
- // static double ireg2_access_power, ireg2_address_power;
- static double ireg2_power_per_reg, ireg2_address_power_per_reg;
- /*
- static double total_cycle_power_cc3=0.0;
- static double total_power_pipeline=0.0;
- static double total_power_cache=0.0;
- static double total_power_slave_l1cache=0.0;
- static double total_power_master_l1cache=0.0;
- static double total_power_l2cache=0.0;
- static double total_leakage_l1cache=0.0;
- static double total_leakage_l2cache=0.0;
- static double total_power_stream=0.0;
- static double total_leakage_stream=0.0;
- static double total_power_spill=0.0;
- static double total_power_interconn=0.0;
- static double total_power_bus=0.0;
- static double total_power_balancer=0.0;
- double total_power_mem = 0.0;
- double total_power_memspill = 0.0;
- static double cycle_leakage_pipeline=0.0;
- static double cycle_leakage_l1cache=0.0;
- static double cycle_leakage_l2cache=0.0;
- static double cycle_leakage_stream=0.0;
- static double cycle_leakage_spill = 0.0;
- static double cycle_leakage_interconn=0.0;
- //double strmbuf_idle_leakage=0.0, cache_idle_leakage=0.0, processor_idle_leakage=0.0, spillbuf_idle_leakage=0.0;
- double strmbuf_power_R[4], strmbuf_power_W[4], spillbuf_power_R[4], spillbuf_power_W[4];
- double L1cache_power_R, L1cache_power_W, L2cache_power_R, L2cache_power_W;
- double strmbuf_leakage=100.0, spillbuf_leakage=100.0, l1cache_leakage=100.0, l2cache_leakage=100.0;
- double balancer_power_R;
- double balancer_power_W;
- double balancer_leakage;
- extern double packet_power_read;
- extern double packet_power_write;
- extern double link_power_bit;
- extern int nStrmbuf;
- extern int nSpillbuf;
- extern int nProcs;
- extern int nL1cache;
- extern int nL2cache;
- extern double mem_power;
- double SB_area=0.0;
- double L1_area=0.0, L2_area=0.0;
- double balancer_area=0.0;
- */
- double
- compute_ireg2_access_power(double wirelength)
- {
- /* wirelength argument is in mm */
- double Cwire, Pwire;
- /* The power dissipated in wires is:
- * Pwire = 0.5 * Cwire * V * V * f
- * Cwire = capacitance of wire = Cmetal (F/um) * wirelength (mm) */
- Cwire = Cmetal * wirelength * 1000;
- Pwire = 0.5 * Cwire * Powerfactor;
- return Pwire;
- }
- /* compute bitline activity factors which we use to scale bitline power
- Here it is very important whether we assume 0's or 1's are
- responsible for dissipating power in pre-charged stuctures. (since
- most of the bits are 0's, we assume the design is power-efficient
- enough to allow 0's to _not_ discharge
- */
- double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
- double avg_pop_count;
- double af,af_b;
- if(num_pop_count_cycle)
- avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
- else
- avg_pop_count = 0;
- af = avg_pop_count / (double)pop_width;
- af_b = 1.0 - af;
- /* printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
- return(af_b);
- }
- /*
- void write_power_stats(char* output)
- {
- FILE* fp=NULL;
- if(output){
- fp = fopen(output, "a");
- }
- if(fp==NULL)
- fp = stdout;
- total_power_cache = total_power_slave_l1cache \
- + total_power_master_l1cache \
- +total_power_l2cache \
- +total_leakage_l1cache \
- +total_leakage_l2cache;
- double total_chip_power = total_power_cache
- +total_power_interconn
- +total_power_bus
- +total_power_pipeline
- +total_power_stream
- +total_leakage_stream
- +total_power_spill
- +total_power_balancer;
- double total_power = total_chip_power \
- +total_power_mem+total_power_memspill;
- fprintf(fp, "\t");
- string delimiter = " \t";
- string description;
- string format;
- //=== cycles ===
- format = " \t%Lu";
- unsigned long long execCycles;
- if (flag_RecordStats)
- execCycles = (unsigned long long)(recordedTick+curTick-recordStartTick)/CLOCK_TICKS;
- else
- execCycles = (unsigned long long)(recordedTick)/CLOCK_TICKS;
- printNameFormat("cycles", execCycles);
- printNameFormat("tick", (unsigned long long) curTick/CLOCK_TICKS);
- //=== counters ===
- format=" \t%Ld";
- printNameFormat("#strmbuf_read", total_strmbuf_read);
- printNameFormat("#strm_cpu_readmiss", total_cpu_read_stream_misses);
- printNameFormat("#strm_resp_net", total_strmbuf_read_net);
- printNameFormat("#strm_fwdnet_latefills", total_fwd_net_latefills);
- printNameFormat("#strm_resp_spill", total_strmbuf_read_spill);
- printNameFormat("#strm_fwdspill_latefills", total_fwd_spill_latefills);
- printNameFormat("#strmbuf_write", total_strmbuf_write);
- printNameFormat("#strmbuf_spills", total_strmbuf_writespill);
- printNameFormat("#strmbuf_spills_by_fwd", total_strmbuf_fwdspill);
- printNameFormat("#fwd_from_net", total_fwd_net);
- printNameFormat("#fwd_from_spill", total_fwd_spill);
- printNameFormat("#fwd_from_mem", total_fwd_mem);
- printNameFormat("#packets", total_packets);
- //printNameFormat("#spills", total_spills);
- printNameFormat("#spill_writes", total_spill_writes);
- printNameFormat("#spill_reads", total_spill_reads);
- printNameFormat("#spill_readmisses", total_spill_readmisses);
- printNameFormat("#spill_mem", total_mem_spills);
- printNameFormat("#mem_strm_reads", total_mem_strmreads);
- printNameFormat("#mem_strm_writes", total_mem_strmwrites);
- printNameFormat("#max_spill_memlines", max_mem_spilledlines);
- printNameFormat("#L1slave_I$_Wr", total_l1slave_Iwrite);
- printNameFormat("#L1slave_I$_Wmis", total_l1slave_Wr_Imisses);
- printNameFormat("#L1slave_I$_Rd", total_l1slave_Iread);
- printNameFormat("#L1slave_I$_Rmis", total_l1slave_Rd_Imisses);
- printNameFormat("#L1slave_D$_Wr", total_l1slave_Dwrite);
- printNameFormat("#L1slave_D$_Wmis", total_l1slave_Wr_Dmisses);
- printNameFormat("#L1slave_D$_Rd", total_l1slave_Dread);
- printNameFormat("#L1slave_D$_Rmis", total_l1slave_Rd_Dmisses);
- printNameFormat("#L1master_I$_Wr", total_l1master_Iwrite);
- printNameFormat("#L1master_I$_Wmis", total_l1master_Wr_Imisses);
- printNameFormat("#L1master_I$_Rd", total_l1master_Iread);
- printNameFormat("#L1master_I$_Rmis", total_l1master_Rd_Imisses);
- printNameFormat("#L1master_D$_Wr", total_l1master_Dwrite);
- printNameFormat("#L1master_D$_Wmis", total_l1master_Wr_Dmisses);
- printNameFormat("#L1master_D$_Rd", total_l1master_Dread);
- printNameFormat("#L1master_D$_Rmis", total_l1master_Rd_Dmisses);
- printNameFormat("#L2_Rd", total_l2cache_read);
- printNameFormat("#L2_Rdmis", total_l2_Rd_misses);
- printNameFormat("#L2_RdEx", total_l2cache_readEx);
- printNameFormat("#L2_RdExmis", total_l2_RdEx_misses);
- printNameFormat("#L2_Wr", total_l2cache_write);
- printNameFormat("#L2_Wrmis", total_l2_Wr_misses);
- printNameFormat("#ins_slave_load", total_slave_memread_ins);
- printNameFormat("#ins_slave_store", total_slave_memwrite_ins);
- printNameFormat("#ins_xflow_read", total_slave_flowread_ins);
- printNameFormat("#ins_xflow_write", total_slave_flowwrite_ins);
- // === cpu status fractions ===
- if(execCycles){
- format="\t%g";
- for (int n=0; n<nProcs; n++){
- double total_fraction=0.0;
- for(int s=0; s<NumCpuStatus; s++){
- Tick cycles = cpusStatusPeriod[s][n]/CLOCK_TICKS;
- //if(cycles==0)
- //continue;
- double fraction = (double)cycles/(double)execCycles;
- total_fraction += fraction;
- fprintf(fp, "cpu%d_%s\t%g\t", n, cpusStatusNames[s].c_str(), fraction);
- }
- assert(total_fraction>0.99 && total_fraction < 1.1);
- }
- }
- //=== power stats ===
- format=" \t%g";
- printNameFormat("pipeline_energy", total_power_pipeline);
- printNameFormat("network_energy", total_power_interconn);
- printNameFormat("bus_energy", total_power_bus);
- printNameFormat("cache_slave_L1_active_energy", total_power_slave_l1cache);
- printNameFormat("cache_master_L1_active_energy",total_power_master_l1cache);
- printNameFormat("cache_L2_active_energy", total_power_l2cache);
- printNameFormat("cache_L1_leakage", total_leakage_l1cache);
- printNameFormat("cache_L2_leakage", total_leakage_l2cache);
- if(!EnableTmprL2asSRF){
- printNameFormat("strmbuf_active_energy", total_power_stream);
- printNameFormat("strmbuf_leakage", total_leakage_stream);
- }else{
- printNameFormat("strmbuf_active_energy", 0.0);
- printNameFormat("strmbuf_leakage", 0.0);
- }
- printNameFormat("spillbuf_energy", total_power_spill);
- printNameFormat("balancer_energy", total_power_balancer);
- printNameFormat("memory_energy", total_power_mem);
- printNameFormat("memory_spill_energy", total_power_memspill);
- printNameFormat("total_on_chip_energy", total_chip_power);
- printNameFormat("total_energy", total_power);
- // === area ===
- format = " \t%g";
- if(!EnableTmprL2asSRF){
- printNameFormat("strmbuf_area", SB_area*nStrmbuf);
- }else{
- printNameFormat("strmbuf_area", 0.0);
- }
- printNameFormat("L1_area", L1_area*nL1cache);
- printNameFormat("L2_area", L2_area*nL2cache);
- printNameFormat("balancer_area", balancer_area);
- fprintf(fp, "\n");
- if (fp!=stdout)
- fclose(fp);
- }
- */
- /* this routine takes the number of rows and cols of an array structure
- and attemps to make it make it more of a reasonable circuit structure
- by trying to make the number of rows and cols as close as possible.
- (scaling both by factors of 2 in opposite directions). it returns
- a scale factor which is the amount that the rows should be divided
- by and the columns should be multiplied by.
- */
- int squarify(int rows, int cols)
- {
- int scale_factor = 1;
- if(rows == cols)
- return 1;
- /*
- printf("init rows == %d\n",rows);
- printf("init cols == %d\n",cols);
- */
- while(rows > cols) {
- rows = rows/2;
- cols = cols*2;
- /*
- printf("rows == %d\n",rows);
- printf("cols == %d\n",cols);
- printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
- */
- if (rows/2 <= cols)
- return((int)pow(2.0,(double)scale_factor));
- scale_factor++;
- }
- return 1;
- }
- /* could improve squarify to work when rows < cols */
- double squarify_new(int rows, int cols)
- {
- double scale_factor = 0.0;
- if(rows==cols)
- return(pow(2.0,scale_factor));
- while(rows > cols) {
- rows = rows/2;
- cols = cols*2;
- if (rows <= cols)
- return(pow(2.0,scale_factor));
- scale_factor++;
- }
- while(cols > rows) {
- rows = rows*2;
- cols = cols/2;
- if (cols <= rows)
- return(pow(2.0,scale_factor));
- scale_factor--;
- }
- return 1;
- }
- void dump_power_stats(power_result_type *power, const CpuProfile& profile)
- {
- double total_power;
- double bpred_power;
- double rename_power;
- double rat_power;
- double dcl_power;
- double lsq_power;
- double window_power;
- double wakeup_power;
- double rs_power;
- double lsq_wakeup_power;
- double lsq_rs_power;
- double regfile_power;
- double reorder_power;
- double icache_power;
- double dcache_power;
- double dcache2_power;
- double dtlb_power;
- double itlb_power;
- double ambient_power = 2.0;
- icache_power = power->icache_power;
- dcache_power = power->dcache_power;
- dcache2_power = power->dcache2_power;
- itlb_power = power->itlb;
- dtlb_power = power->dtlb;
- bpred_power = power->btb + power->local_predict + power->global_predict +
- power->chooser + power->ras;
- rat_power = power->rat_decoder +
- power->rat_wordline + power->rat_bitline + power->rat_senseamp;
- dcl_power = power->dcl_compare + power->dcl_pencode;
- rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
- /* wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch +
- power->wakeup_ormatch; */
- /* lym */
- wakeup_power = power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch +
- power->iq_wakeup_ormatch + power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch + power->fpq_wakeup_ormatch;
- /* lym */
- rs_power = power->rs_decoder +
- power->rs_wordline + power->rs_bitline + power->rs_senseamp;
- window_power = wakeup_power + rs_power + /* power->selection *//* lym */
- + power->iq_selection + power->fpq_selection
- + power->lsq_selection;
- lsq_rs_power = power->lsq_rs_decoder +
- power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
- lsq_wakeup_power = power->lsq_wakeup_tagdrive +
- power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
- lsq_power = lsq_wakeup_power + lsq_rs_power;
- reorder_power = power->reorder_decoder +
- power->reorder_wordline + power->reorder_bitline +
- power->reorder_senseamp;
- regfile_power = power->regfile_decoder +
- power->regfile_wordline + power->regfile_bitline +
- power->regfile_senseamp;
- total_power = bpred_power + rename_power + window_power + regfile_power +
- power->resultbus + lsq_power +
- icache_power + dcache_power + dcache2_power +
- dtlb_power + itlb_power + power->clock_power + power->ialu_power +
- power->falu_power;
- fprintf(stderr,"\nProcessor Parameters:\n");
- fprintf(stderr,"Issue Width: %d\n",profile.ruu_issue_width);
- fprintf(stderr,"Window Size: %d\n",profile.RUU_size);
- fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
- fprintf(stderr,"Number of Physical Registers: %d\n",profile.RUU_size);
- fprintf(stderr,"Datapath Width: %d\n",profile.data_width);
- fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
- fprintf(stderr,"Branch Predictor Power Consumption: %g (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
- fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
- fprintf(stderr," local predict power (W): %g\n",power->local_predict);
- fprintf(stderr," global predict power (W): %g\n",power->global_predict);
- fprintf(stderr," chooser power (W): %g\n",power->chooser);
- fprintf(stderr," RAS power (W): %g\n",power->ras);
- fprintf(stderr,"Rename Logic Power Consumption: %g (%.3g%%)\n",rename_power,100*rename_power/total_power);
- fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
- fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
- fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
- fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
- fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
- fprintf(stderr,"Instruction Window Power Consumption: %g (%.3g%%)\n",window_power,100*window_power/total_power);
- /* fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
- fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
- fprintf(stderr," Selection Logic (W): %g\n",power->selection); */
- /* lym */
- fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
- fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
- fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
- fprintf(stderr,"Load/Store Queue Power Consumption: %g (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
- fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
- fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
- fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
- fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
- fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
- fprintf(stderr,"Arch. Register File Power Consumption: %g (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
- fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
- fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
- fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
- fprintf(stderr,"Result Bus Power Consumption: %g (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
- fprintf(stderr,"Total Clock Power: %g (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
- fprintf(stderr,"Int ALU Power: %g (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
- fprintf(stderr,"FP ALU Power: %g (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
- fprintf(stderr,"Instruction Cache Power Consumption: %g (%.3g%%)\n",icache_power,100*icache_power/total_power);
- fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
- fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
- fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
- fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
- fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
- fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
- fprintf(stderr,"Data Cache Power Consumption: %g (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
- fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
- fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
- fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
- fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
- fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
- fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
- fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
- fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
- fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
- fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
- fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
- fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
- }
- /*======================================================================*/
- /*
- * This part of the code contains routines for each section as
- * described in the tech report. See the tech report for more details
- * and explanations */
- /*----------------------------------------------------------------------*/
- double driver_size(double driving_cap, double desiredrisetime) {
- double nsize, psize;
- double Rpdrive;
- Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
- psize = restowidth(Rpdrive,PCH);
- nsize = restowidth(Rpdrive,NCH);
- if (psize > Wworddrivemax) {
- psize = Wworddrivemax;
- }
- if (psize < 4.0 * LSCALE)
- psize = 4.0 * LSCALE;
- return (psize);
- }
- /* Decoder delay: (see section 6.1 of tech report) */
- double array_decoder_power
- (
- int rows, int cols,
- double predeclength,
- int rports, int wports,
- int cache
- )
- {
- double Ctotal=0;
- double Ceq=0;
- int numstack;
- int decode_bits=0;
- int ports;
- double rowsb;
- /* read and write ports are the same here */
- ports = rports + wports;
- rowsb = (double)rows;
- /* number of input bits to be decoded */
- decode_bits=(int)ceil((logtwo(rowsb)));
- /* First stage: driving the decoders */
- /* This is the capacitance for driving one bit (and its complement).
- -There are #rowsb 3->8 decoders contributing gatecap.
- - 2.0 factor from 2 identical sets of drivers in parallel
- */
- Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
- gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
- /* There are ports * #decode_bits total */
- Ctotal+=ports*decode_bits*Ceq;
- if(verbose)
- fprintf(stderr,"Decoder -- Driving decoders == %g\n",.3*Ctotal*Powerfactor);
- /* second stage: driving a bunch of nor gates with a nand
- numstack is the size of the nor gates -- ie. a 7-128 decoder has
- 3-input NAND followed by 3-input NOR */
- numstack = (int)ceil((1.0/3.0)*logtwo(rows));
- if (numstack<=0) numstack = 1;
- if (numstack>5) numstack = 5;
- /* There are #rowsb NOR gates being driven*/
- Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
- gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
- Ctotal+=ports*Ceq;
- if(verbose)
- fprintf(stderr,"Decoder -- Driving nor w/ nand == %g\n",.3*ports*Ceq*Powerfactor);
- /* Final stage: driving an inverter with the nor
- (inverter preceding wordline driver) -- wordline driver is in the next section*/
- Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
- numstack*draincap(WdecNORn,NCH,1)+
- draincap(WdecNORp,PCH,numstack));
- if(verbose)
- fprintf(stderr,"Decoder -- Driving inverter w/ nor == %g\n",.3*ports*Ceq*Powerfactor);
- Ctotal+=ports*Ceq;
- /* assume Activity Factor == .3 */
- return(.3*Ctotal*Powerfactor);
- }
- double simple_array_decoder_power
- (
- int rows, int cols,
- int rports, int wports,
- int cache
- )
- {
- double predeclength=0.0;
- return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
- }
- double array_wordline_power
- (
- int rows, int cols,
- double wordlinelength,
- int rports, int wports,
- int cache
- )
- {
- double Ctotal=0;
- double Ceq=0;
- double Cline=0;
- double Cliner, Clinew=0;
- double desiredrisetime,psize,nsize;
- int ports;
- double colsb;
- ports = rports+wports;
- colsb = (double)cols;
- /* Calculate size of wordline drivers assuming rise time == Period / 8
- - estimate cap on line
- - compute min resistance to achieve this with RC
- - compute width needed to achieve this resistance */
- desiredrisetime = Period/16;
- Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
- psize = driver_size(Cline,desiredrisetime);
- /* how do we want to do p-n ratioing? -- here we just assume the same ratio
- from an inverter pair */
- nsize = psize * Wdecinvn/Wdecinvp;
- if(verbose)
- fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
- Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
- gatecap(nsize+psize,20.0);
- Ctotal+=ports*Ceq;
- if(verbose)
- fprintf(stderr,"Wordline -- Inverter -> Driver == %g\n",ports*Ceq*Powerfactor);
- /* Compute caps of read wordline and write wordlines
- - wordline driver caps, given computed width from above
- - read wordlines have 1 nmos access tx, size ~4
- - write wordlines have 2 nmos access tx, size ~2
- - metal line cap
- */
- Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
- wordlinelength*CM3metal+
- 2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
- Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
- wordlinelength*CM3metal+
- 2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
- if(verbose) {
- fprintf(stderr,"Wordline -- Line == %g\n",1e12*Cline);
- fprintf(stderr,"Wordline -- Line -- access -- gatecap == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
- fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
- fprintf(stderr,"Wordline -- Line -- metal == %g\n",1e12*wordlinelength*CM3metal);
- }
- Ctotal+=rports*Cliner+wports*Clinew;
- /* AF == 1 assuming a different wordline is charged each cycle, but only
- 1 wordline (per port) is actually used */
- return(Ctotal*Powerfactor);
- }
- double simple_array_wordline_power
- (
- int rows, int cols,
- int rports, int wports,
- int cache
- )
- {
- double wordlinelength;
- int ports = rports + wports;
- wordlinelength = cols * (RegCellWidth + 2 * ports * BitlineSpacing);
- return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
- }
- double array_bitline_power
- (
- int rows, int cols,
- double bitlinelength,
- int rports, int wports,
- int cache
- )
- {
- double Ctotal=0;
- double Ccolmux=0;
- double Cbitrowr=0;
- double Cbitroww=0;
- double Cprerow=0;
- double Cwritebitdrive=0;
- double Cpregate=0;
- double Cliner=0;
- double Clinew=0;
- int ports;
- double rowsb;
- double colsb;
- double desiredrisetime, Cline, psize, nsize;
- ports = rports + wports;
- rowsb = (double)rows;
- colsb = (double)cols;
- /* Draincaps of access tx's */
- Cbitrowr = draincap(Wmemcellr,NCH,1);
- Cbitroww = draincap(Wmemcellw,NCH,1);
- /* Cprerow -- precharge cap on the bitline
- -simple scheme to estimate size of pre-charge tx's in a similar fashion
- to wordline driver size estimation.
- -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
- from this version because it couldn't autosize as easily.
- */
- desiredrisetime = Period/8;
- Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
- psize = driver_size(Cline,desiredrisetime);
- /* compensate for not having an nmos pre-charging */
- psize = psize + psize * Wdecinvn/Wdecinvp;
- if(verbose)
- printf("Cprerow auto == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
- Cprerow = draincap(psize,PCH,1);
- /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
- onto bitline cap, again this could have a keeper */
- Cpregate = 4.0*gatecap(psize,10.0);
- global_clockcap+=rports*cols*2.0*Cpregate;
- /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
- stuff for write bitlines
- - 2 inverter drivers within each driver pair */
- Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
- psize = driver_size(Cline,desiredrisetime);
- nsize = psize * Wdecinvn/Wdecinvp;
- Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
- /*
- reg files (cache==0)
- => single ended bitlines (1 bitline/col)
- => AFs from pop_count
- caches (cache ==1)
- => double-ended bitlines (2 bitlines/col)
- => AFs = .5 (since one of the two bitlines is always charging/discharging)
- */
- #ifdef STATIC_AF
- if (cache == 0) {
- /* compute the total line cap for read/write bitlines */
- Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
- Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
- /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
- in cache styles) */
- Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
- Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
- Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
- }
- else {
- Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
- Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
- Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
- Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
- Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
- }
- #else
- if (cache == 0) {
- /* compute the total line cap for read/write bitlines */
- Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
- Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
- /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
- in cache styles) */
- Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
- Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
- Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
- }
- else {
- Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
- Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
- Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
- Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
- Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
- }
- #endif
- if(verbose) {
- fprintf(stderr,"Bitline -- Precharge == %g\n",1e12*Cpregate);
- fprintf(stderr,"Bitline -- Line == %g\n",1e12*(Cliner+Clinew));
- fprintf(stderr,"Bitline -- Line -- access draincap == %g\n",1e12*rowsb*Cbitrowr);
- fprintf(stderr,"Bitline -- Line -- precharge draincap == %g\n",1e12*Cprerow);
- fprintf(stderr,"Bitline -- Line -- metal == %g\n",1e12*bitlinelength*CM2metal);
- fprintf(stderr,"Bitline -- Colmux == %g\n",1e12*Ccolmux);
- fprintf(stderr,"\n");
- }
- if(cache==0)
- return(Ctotal*Powerfactor);
- else
- return(Ctotal*SensePowerfactor*.4);
- }
- double simple_array_bitline_power
- (
- int rows, int cols,
- int rports, int wports,
- int cache
- )
- {
- double bitlinelength;
- int ports = rports + wports;
- bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
- return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
- }
- /* estimate senseamp power dissipation in cache structures (Zyuban's method) */
- double senseamp_power(int cols)
- {
- return((double)cols * Vdd/8 * .5e-3);
- }
- /* estimate comparator power consumption (this comparator is similar
- to the tag-match structure in a CAM */
- double compare_cap(int compare_bits)
- {
- double c1, c2;
- /* bottom part of comparator */
- c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
- draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
- /* top part of comparator */
- c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
- draincap(Wcomppreequ,NCH,1)) +
- gatecap(WdecNORn,1.0)+
- gatecap(WdecNORp,3.0);
- return(c1 + c2);
- }
- /* power of depency check logic */
- double dcl_compare_power(int compare_bits, const CpuProfile& profile)
- {
- double Ctotal;
- int num_comparators;
- num_comparators = (profile.ruu_decode_width - 1) * (profile.ruu_decode_width);
- Ctotal = num_comparators * compare_cap(compare_bits);
- return(Ctotal*Powerfactor*AF);
- }
- double simple_array_power
- (
- int rows, int cols,
- int rports, int wports,
- int cache
- )
- {
- if(cache==0)
- return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
- simple_array_wordline_power(rows,cols,rports,wports,cache)+
- simple_array_bitline_power(rows,cols,rports,wports,cache));
- else
- return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
- simple_array_wordline_power(rows,cols,rports,wports,cache)+
- simple_array_bitline_power(rows,cols,rports,wports,cache)+
- senseamp_power(cols));
- }
- double cam_tagdrive
- (
- int rows, int cols,
- int rports, int wports
- )
- {
- double Ctotal, Ctlcap, Cblcap, Cwlcap;
- double taglinelength;
- double wordlinelength;
- double nsize, psize;
- int ports;
- Ctotal=0;
- ports = rports + wports;
- taglinelength = rows *
- (CamCellHeight + ports * MatchlineSpacing);
- wordlinelength = cols *
- (CamCellWidth + ports * TaglineSpacing);
- /* Compute tagline cap */
- Ctlcap = Cmetal * taglinelength +
- rows * gatecappass(Wcomparen2,2.0) +
- draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
- /* Compute bitline cap (for writing new tags) */
- Cblcap = Cmetal * taglinelength +
- rows * draincap(Wmemcellr,NCH,2);
- /* autosize wordline driver */
- psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
- nsize = psize * Wdecinvn/Wdecinvp;
- /* Compute wordline cap (for writing new tags) */
- Cwlcap = Cmetal * wordlinelength +
- draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
- 2 * cols * gatecap(Wmemcellr,2.0);
- Ctotal += (rports * cols * 2 * Ctlcap) +
- (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
- return(Ctotal*Powerfactor*AF);
- }
- double cam_tagmatch
- (
- int rows, int cols,
- int rports, int wports,
- int issue_width
- )
- {
- double Ctotal, Cmlcap;
- double matchlinelength;
- int ports;
- Ctotal=0;
- ports = rports + wports;
- matchlinelength = cols *
- (CamCellWidth + ports * TaglineSpacing);
- Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) +
- Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
- gatecap(Wmatchinvn+Wmatchinvp,10.0) +
- gatecap(Wmatchnandn+Wmatchnandp,10.0);
- Ctotal += rports * rows * Cmlcap;
- global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
- /* noring the nanded match lines */
- if(issue_width >= 8) /* lym */
- Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
- return(Ctotal*Powerfactor*AF);
- }
- double cam_array
- (
- int rows, int cols,
- int rports, int wports,
- int issuewidth
- )
- {
- return(cam_tagdrive(rows,cols,rports,wports) +
- cam_tagmatch(rows,cols,rports,wports,issuewidth)); /* lym */
- }
- double selection_power(int win_entries, int issue_width) /* lym */
- {
- double Ctotal, Cor, Cpencode;
- int num_arbiter=1;
- Ctotal=0;
- while(win_entries > 4)
- {
- win_entries = (int)ceil((double)win_entries / 4.0);
- num_arbiter += win_entries;
- }
- Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
- Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) +
- 2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) +
- 3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) +
- 4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) +
- 4*gatecap(WSelEnn+WSelEnp,20.0) +
- 4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
- Ctotal += issue_width * num_arbiter*(Cor+Cpencode); /* lym */
- return(Ctotal*Powerfactor*AF);
- }
- /* very rough clock power estimates */
- double total_clockpower(double die_length, const CpuProfile& profile)
- {
- double clocklinelength;
- double Cline,Cline2,Ctotal;
- double pipereg_clockcap=0;
- double global_buffercap = 0;
- double Clockpower;
- double num_piperegs;
- /* int npreg_width = (int)ceil(logtwo((double)profile.RUU_size)); */ /* lym */
- /* Assume say 8 stages (kinda low now).
- FIXME: this could be a lot better; user could input
- number of pipestages, etc */
- /* assume 8 pipe stages and try to estimate bits per pipe stage */
- /* pipe stage 0/1 */
- num_piperegs = profile.ruu_issue_width*inst_length + profile.data_width;
- /* pipe stage 1/2 */
- num_piperegs += profile.ruu_issue_width*(inst_length + 3 * profile.RUU_size);
- /* pipe stage 2/3 */
- num_piperegs += profile.ruu_issue_width*(inst_length + 3 * profile.RUU_size);
- /* pipe stage 3/4 */
- num_piperegs += profile.ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
- /* pipe stage 4/5 */
- num_piperegs += profile.ruu_issue_width*(2*profile.data_width + pow2(opcode_length));
- /* pipe stage 5/6 */
- num_piperegs += profile.ruu_issue_width*(profile.data_width + pow2(opcode_length));
- /* pipe stage 6/7 */
- num_piperegs += profile.ruu_issue_width*(profile.data_width + pow2(opcode_length));
- /* pipe stage 7/8 */
- num_piperegs += profile.ruu_issue_width*(profile.data_width + pow2(opcode_length));
- /* assume 50% extra in control signals (rule of thumb) */
- num_piperegs = num_piperegs * 1.5;
- pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
- /* estimate based on 3% of die being in clock metal */
- Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
- /* another estimate */
- clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
- Cline = 20 * Cmetal * (clocklinelength) * 1e6;
- global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
- /* global_clockcap is computed within each array structure for pre-charge tx's*/
- Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
- if(verbose)
- fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
- /* add I_ADD Clockcap and F_ADD Clockcap */
- Clockpower = Ctotal*Powerfactor + profile.res_ialu*I_ADD_CLOCK + profile.res_fpalu*F_ADD_CLOCK;
- if(verbose) {
- fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
- fprintf(stderr," Global Metal Lines (W): %g\n",Cline*Powerfactor);
- fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
- fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
- fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
- fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
- }
- return(Clockpower);
- }
- /* very rough global clock power estimates */
- double global_clockpower(double die_length)
- {
- double clocklinelength;
- double Cline,Cline2,Ctotal;
- double global_buffercap = 0;
- Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
- clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
- Cline = 20 * Cmetal * (clocklinelength) * 1e6;
- global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
- Ctotal = Cline+global_buffercap;
- if(verbose) {
- fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
- fprintf(stderr," Global Metal Lines (W): %g\n",Cline*Powerfactor);
- fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
- fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
- }
- return(Ctotal*Powerfactor);
- }
- double compute_resultbus_power(const CpuProfile& profile)
- {
- double Ctotal, Cline;
- double regfile_height;
- /* compute size of result bus tags */
- /* int npreg_width = (int)ceil(logtwo((double)profile.RUU_size)); */ /* lym */
- Ctotal=0;
- /*Changed to account for alpha ev6 clustering. Right now the length will
- be to the integer ruu*/
- regfile_height = profile.RUU_size * (RegCellHeight +
- WordlineSpacing * 10);
- /* assume num alu's == ialu (FIXME: generate a more detailed result bus network model*/
- Cline = Cmetal * (regfile_height + .5 * profile.res_ialu * 3200.0 * LSCALE);
- /* or use result bus length measured from 21264 die photo */
- /* Cline = Cmetal * 3.3*1000;*/
- /* Assume profile.ruu_issue_width result busses -- power can be scaled linearly
- for number of result busses (scale by writeback_access) */
- Ctotal += 2.0 * (profile.data_width + npreg_width) * 6* Cline; /* changed for ev6 */
- #ifdef STATIC_AF
- return(Ctotal*Powerfactor*AF);
- #else
- return(Ctotal*Powerfactor);
- #endif
- }
- void calculate_power(power_result_type *power, const CpuProfile& profile)
- {
- Mhz = profile.Mhz;
- Vdd = profile.vdd;
- double clockpower;
- double predeclength, wordlinelength, bitlinelength;
- int cache;
- //int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
- //int trowsb, tcolsb, tagsize;
- //int va_size = 48;
- /* int npreg_width = (int)ceil(logtwo((double)profile.RUU_size)); *//* lym */
- /* these variables are needed to use Cacti to auto-size cache arrays
- (for optimal delay) */
- // time_result_type time_result;
- //time_parameter_type time_parameters;
- /* used to autosize other structures, like bpred tables */
- //int scale_factor;
- global_clockcap = 0;
- cache=0;
- #ifdef DYNAMIC_AF
- //double window_af_b, lsq_af_b;
- //window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
- //lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
- power->regfile_af_b =
- compute_af( regfile_num_pop_count_cycle,
- regfile_total_pop_count_cycle,
- profile.data_width);
- power->resultbus_af_b =
- compute_af(resultbus_num_pop_count_cycle,
- resultbus_total_pop_count_cycle,
- profile.data_width);
- #endif
- /* FIXME: ALU power is a simple constant, it would be better
- to include bit AFs and have different numbers for different
- types of operations */
- power->ialu_power = profile.res_ialu * I_ADD;
- power->falu_power = profile.res_fpalu * F_ADD;
- nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
- npreg_width = (int)ceil(logtwo((double)profile.RUU_size));
- /* RAT has shadow bits stored in each cell, this makes the
- cell size larger than normal array structures, so we must
- compute it here */
- predeclength = MD_NUM_IREGS *
- (RatCellHeight + 3 * profile.ruu_decode_width * WordlineSpacing);
- wordlinelength = npreg_width *
- (RatCellWidth +
- 6 * profile.ruu_decode_width * BitlineSpacing +
- RatShiftRegWidth*RatNumShift);
- bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * profile.ruu_decode_width * WordlineSpacing);
- if(verbose)
- fprintf(stderr,"rat power stats\n");
- /*changed to a 80 entry cam, since it says so in the 21264 micro paper*/
- power->rat_decoder = cam_tagdrive(profile.RUU_size,
- npreg_width,
- profile.ruu_decode_width,
- profile.ruu_decode_width);
- power->rat_wordline = cam_tagmatch(profile.RUU_size,
- npreg_width,
- profile.ruu_decode_width,
- profile.ruu_decode_width,
- profile.ruu_decode_width);
- power->rat_bitline = 0;
- power->rat_senseamp = 0;
- power->dcl_compare = dcl_compare_power(nvreg_width, profile);
- power->dcl_pencode = 0;
- power->inst_decoder_power = profile.ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
- /* power->wakeup_tagdrive =cam_tagdrive(profile.RUU_size,npreg_width,profile.ruu_issue_width,profile.ruu_issue_width);
- power->wakeup_tagmatch =cam_tagmatch(profile.RUU_size,npreg_width,profile.ruu_issue_width,profile.ruu_issue_width);
- power->wakeup_ormatch =0; */
- /* lym
- power->iq_wakeup_tagdrive =cam_tagdrive(IQ_size,npreg_width,iq_issue_width,iq_issue_width);
- power->iq_wakeup_tagmatch =cam_tagmatch(IQ_size,npreg_width,iq_issue_width,iq_issue_width,iq_issue_width);
- power->iq_wakeup_ormatch =0;
- power->fpq_wakeup_tagdrive =cam_tagdrive(FPQ_size,npreg_width,fpq_issue_width,fpq_issue_width);
- power->fpq_wakeup_tagmatch =cam_tagmatch(FPQ_size,npreg_width,fpq_issue_width,fpq_issue_width,fpq_issue_width);
- power->fpq_wakeup_ormatch =0;
- lym */
- /* power->selection = selection_power(profile.RUU_size); */
- /* lym
- power->iq_selection = selection_power(IQ_size,iq_issue_width);
- power->fpq_selection = selection_power(FPQ_size,fpq_issue_width);
- power->lsq_selection = selection_power(profile.LSQ_size,lsq_issue_width);
- lym */
- /*special numbers for alpha 21264*/
- /*I'm reducing issue width to 1/3 of the normal, since every cluster is just 2 wide
- and then I'm just going to add the 2 integer clusters up, leaving the fp cluster aside for the
- moment*/
- predeclength = MD_NUM_IREGS * (RegCellHeight + 2 * profile.ruu_issue_width * WordlineSpacing);
- wordlinelength = profile.data_width *
- (RegCellWidth +
- 4 * profile.ruu_issue_width * BitlineSpacing);
- bitlinelength = MD_NUM_IREGS * (RegCellHeight + 2 * profile.ruu_issue_width* WordlineSpacing);
- if(verbose)
- fprintf(stderr,"regfile power stats\n");
- power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,profile.data_width,predeclength,
- 4/3*profile.ruu_issue_width ,profile.ruu_issue_width*2/3,cache);
- power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,profile.data_width,wordlinelength,
- 4/3*profile.ruu_issue_width,profile.ruu_issue_width*2/3 ,cache);
- power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,profile.data_width,bitlinelength,
- 4/3*profile.ruu_issue_width,profile.ruu_issue_width*2/3,cache);
- power->regfile_senseamp =0;
- /*
- predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
- wordlinelength = profile.data_width *
- (RegCellWidth +
- 6 * ruu_issue_width * BitlineSpacing);
- bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
- if(verbose)
- fprintf(stderr,"regfile power stats\n");
- power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,profile.data_width,predeclength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
- power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,profile.data_width,wordlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
- power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,profile.data_width,bitlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
- power->regfile_senseamp =0;
- */
- /*Again for alpha*/
- /*for int_window*/
- /*4 Read ports, 6 Write ports per int regfile*/
- predeclength = profile.RUU_size * (RegCellHeight + /*2 * ruu_issue_width */10 * WordlineSpacing);
- wordlinelength = profile.data_width *
- (RegCellWidth +
- /*4 * ruu_issue_width */20* BitlineSpacing);
- bitlinelength = profile.RUU_size * (RegCellHeight +/* 2 * ruu_issue_width*/ 10* WordlineSpacing);
- if(verbose)
- fprintf(stderr,"res station power stats\n");
- power->rs_decoder = array_decoder_power(profile.RUU_size,profile.data_width,predeclength,4,6/*2/3*profile.ruu_issue_width,profile.ruu_issue_width*/,cache);
- power->rs_wordline = array_wordline_power(profile.RUU_size,profile.data_width,wordlinelength,4,6/*2/3*profile.ruu_issue_width,profile.ruu_issue_width*/,cache);
- power->rs_bitline = array_bitline_power(profile.RUU_size,profile.data_width,bitlinelength,4,6/*2/3*profile.ruu_issue_width,profile.ruu_issue_width*/,cache);
- /*for fp_window*/
- /*4W, 4R*/
- predeclength = 72 * (RegCellHeight + /*profile.ruu_issue_width*/8 * WordlineSpacing);
- wordlinelength = profile.data_width *
- (RegCellWidth +
- /*2 * profile.ruu_issue_width*/16 * BitlineSpacing);
- bitlinelength = 72 * (RegCellHeight + /*profile.ruu_issue_width*/8 * WordlineSpacing);
- if(verbose)
- fprintf(stderr,"res station power stats\n");
- /*power->rs_decoder*/fp_reg_power += array_decoder_power(72,profile.data_width,predeclength,4,4/*2/3*profile.ruu_issue_width,2/3*profile.ruu_issue_width*/,cache);
- /*power->rs_wordline*/fp_reg_power += array_wordline_power(72,profile.data_width,wordlinelength,4,4/*2/3*profile.ruu_issue_width,2/3*profile.ruu_issue_width*/,cache);
- /*power->rs_bitline*/fp_reg_power += array_bitline_power(72,profile.data_width,bitlinelength,4,4 /*2/3*profile.ruu_issue_width,2/3*profile.ruu_issue_width*/,cache);
- /*
- predeclength = profile.RUU_size * (RegCellHeight + 3 * profile.ruu_issue_width * WordlineSpacing);
- wordlinelength = profile.data_width *
- (RegCellWidth +
- 6 * profile.ruu_issue_width * BitlineSpacing);
- bitlinelength = profile.RUU_size * (RegCellHeight + 3 * profile.ruu_issue_width * WordlineSpacing);
- if(verbose)
- fprintf(stderr,"res station power stats\n");
- power->rs_decoder = array_decoder_power(profile.RUU_size,profile.data_width,predeclength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
- power->rs_wordline = array_wordline_power(profile.RUU_size,profile.data_width,wordlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
- power->rs_bitline = array_bitline_power(profile.RUU_size,profile.data_width,bitlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
- */
- /* no senseamps in reg file structures (only caches) */
- power->rs_senseamp =0;
- /* addresses go into lsq tag's
- power->lsq_wakeup_tagdrive =cam_tagdrive(profile.LSQ_size,profile.data_width,profile.res_memport,profile.res_memport);
- power->lsq_wakeup_tagmatch =cam_tagmatch(profile.LSQ_size,profile.data_width,profile.res_memport,profile.res_memport,lsq_issue_width);
- power->lsq_wakeup_ormatch =0;
- */
- wordlinelength = profile.data_width *
- (RegCellWidth +
- 4 * profile.res_memport * BitlineSpacing);
- bitlinelength = profile.RUU_size * (RegCellHeight + 4 * profile.res_memport * WordlineSpacing);
- /* rs's hold data */
- if(verbose)
- fprintf(stderr,"lsq station power stats\n");
- power->lsq_rs_decoder = array_decoder_power(profile.LSQ_size,profile.data_width,predeclength,profile.res_memport,profile.res_memport,cache);
- power->lsq_rs_wordline = array_wordline_power(profile.LSQ_size,profile.data_width,wordlinelength,profile.res_memport,profile.res_memport,cache);
- power->lsq_rs_bitline = array_bitline_power(profile.LSQ_size,profile.data_width,bitlinelength,profile.res_memport,profile.res_memport,cache);
- power->lsq_rs_senseamp =0;
- power->resultbus = compute_resultbus_power(profile);
- /*
- // Load cache values into what cacti is expecting
- time_parameters.cache_size = btb_config[0] * (profile.data_width/8) * btb_config[1]; // C
- time_parameters.block_size = (profile.data_width/8); // B
- time_parameters.associativity = btb_config[1]; // A
- time_parameters.number_of_sets = btb_config[0]; // C/(B*A)
- // have Cacti compute optimal cache config
- calculate_time(&time_result,&time_parameters);
- output_data(&time_result,&time_parameters);
- // extract Cacti results
- ndwl=time_result.best_Ndwl;
- ndbl=time_result.best_Ndbl;
- nspd=time_result.best_Nspd;
- ntwl=time_result.best_Ntwl;
- ntbl=time_result.best_Ntbl;
- ntspd=time_result.best_Ntspd;
- c = time_parameters.cache_size;
- b = time_parameters.block_size;
- a = time_parameters.associativity;
- cache=1;
- // Figure out how many rows/cols there are now
- rowsb = c/(b*a*ndbl*nspd);
- colsb = 8*b*a*nspd/ndwl;
- if(verbose) {
- fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
- fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
- fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
- }
- predeclength = rowsb * (RegCellHeight + WordlineSpacing);
- wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
- bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"btb power stats\n");
- power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
- cache=1;
- scale_factor = squarify(twolev_config[0],twolev_config[2]);
- predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
- wordlinelength = twolev_config[2] * scale_factor * (RegCellWidth + BitlineSpacing);
- bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"local predict power stats\n");
- power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
- scale_factor = squarify(twolev_config[1],3);
- predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
- wordlinelength = 3 * scale_factor * (RegCellWidth + BitlineSpacing);
- bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"local predict power stats\n");
- power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
- if(verbose)
- fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
- scale_factor = squarify(bimod_config[0],2);
- predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
- wordlinelength = 2*scale_factor * (RegCellWidth + BitlineSpacing);
- bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"global predict power stats\n");
- power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
- scale_factor = squarify(comb_config[0],2);
- predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
- wordlinelength = 2*scale_factor * (RegCellWidth + BitlineSpacing);
- bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"chooser predict power stats\n");
- power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
- */
- if(verbose)
- fprintf(stderr,"RAS predict power stats\n");
- power->ras = simple_array_power(ras_size,profile.data_width,1,1,0);
- /*
- tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
- if(verbose)
- fprintf(stderr,"dtlb predict power stats\n");
- power->dtlb = profile.res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
- tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
- */
- /*
- predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
- wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
- bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"itlb predict power stats\n");
- power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
- cache=1;
- time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; // C
- time_parameters.block_size = cache_il1->bsize; // B
- time_parameters.associativity = cache_il1->assoc; // A
- time_parameters.number_of_sets = cache_il1->nsets; // C/(B*A)
- calculate_time(&time_result,&time_parameters);
- output_data(&time_result,&time_parameters);
- ndwl=time_result.best_Ndwl;
- ndbl=time_result.best_Ndbl;
- nspd=time_result.best_Nspd;
- ntwl=time_result.best_Ntwl;
- ntbl=time_result.best_Ntbl;
- ntspd=time_result.best_Ntspd;
- c = time_parameters.cache_size;
- b = time_parameters.block_size;
- a = time_parameters.associativity;
- rowsb = c/(b*a*ndbl*nspd);
- colsb = 8*b*a*nspd/ndwl;
- tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
- trowsb = c/(b*a*ntbl*ntspd);
- tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
- if(verbose) {
- fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
- fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
- fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
- fprintf(stderr,"tagsize == %d\n",tagsize);
- }
- predeclength = rowsb * (RegCellHeight + WordlineSpacing);
- wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
- bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"icache power stats\n");
- power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
- power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
- power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
- power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
- power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
- power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
- time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; // C
- time_parameters.block_size = cache_dl1->bsize; // B
- time_parameters.associativity = cache_dl1->assoc; // A
- time_parameters.number_of_sets = cache_dl1->nsets; // C/(B*A)
- calculate_time(&time_result,&time_parameters);
- output_data(&time_result,&time_parameters);
- ndwl=time_result.best_Ndwl;
- ndbl=time_result.best_Ndbl;
- nspd=time_result.best_Nspd;
- ntwl=time_result.best_Ntwl;
- ntbl=time_result.best_Ntbl;
- ntspd=time_result.best_Ntspd;
- c = time_parameters.cache_size;
- b = time_parameters.block_size;
- a = time_parameters.associativity;
- cache=1;
- rowsb = c/(b*a*ndbl*nspd);
- colsb = 8*b*a*nspd/ndwl;
- tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
- trowsb = c/(b*a*ntbl*ntspd);
- tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
- if(verbose) {
- fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
- fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
- fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
- fprintf(stderr,"tagsize == %d\n",tagsize);
- fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
- fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
- }
- predeclength = rowsb * (RegCellHeight + WordlineSpacing);
- wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
- bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"dcache power stats\n");
- power->dcache_decoder = profile.res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
- power->dcache_wordline = profile.res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
- power->dcache_bitline = profile.res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
- power->dcache_senseamp = profile.res_memport*ndwl*ndbl*senseamp_power(colsb);
- power->dcache_tagarray = profile.res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
- power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
- */
- //clockpower = total_clockpower(.018);
- clockpower = total_clockpower(.007, profile);
- power->clock_power = clockpower;
- /*
- if(verbose) {
- fprintf(stderr,"result bus power == %f\n",power->resultbus);
- fprintf(stderr,"global clock power == %f\n",clockpower);
- }
- time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; // C
- time_parameters.block_size = cache_dl2->bsize; // B
- time_parameters.associativity = cache_dl2->assoc; // A
- time_parameters.number_of_sets = cache_dl2->nsets; // C/(B*A)
- calculate_time(&time_result,&time_parameters);
- output_data(&time_result,&time_parameters);
- ndwl=time_result.best_Ndwl;
- ndbl=time_result.best_Ndbl;
- nspd=time_result.best_Nspd;
- ntwl=time_result.best_Ntwl;
- ntbl=time_result.best_Ntbl;
- ntspd=time_result.best_Ntspd;
- c = time_parameters.cache_size;
- b = time_parameters.block_size;
- a = time_parameters.associativity;
- rowsb = c/(b*a*ndbl*nspd);
- colsb = 8*b*a*nspd/ndwl;
- tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
- trowsb = c/(b*a*ntbl*ntspd);
- tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
- if(verbose) {
- fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
- fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
- fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
- fprintf(stderr,"tagsize == %d\n",tagsize);
- }
- predeclength = rowsb * (RegCellHeight + WordlineSpacing);
- wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
- bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
- if(verbose)
- fprintf(stderr,"dcache2 power stats\n");
- power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
- power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
- power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
- power->dcache2_senseamp = senseamp_power(colsb);
- power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
- power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
- power->rat_decoder *= crossover_scaling;
- power->rat_wordline *= crossover_scaling;
- power->rat_bitline *= crossover_scaling;
- power->dcl_compare *= crossover_scaling;
- power->dcl_pencode *= crossover_scaling;
- power->inst_decoder_power *= crossover_scaling;
- */
- /* power->wakeup_tagdrive *= crossover_scaling;
- power->wakeup_tagmatch *= crossover_scaling;
- power->wakeup_ormatch *= crossover_scaling; */
- /* lym
- power->iq_wakeup_tagdrive *= crossover_scaling;
- power->iq_wakeup_tagmatch *= crossover_scaling;
- power->iq_wakeup_ormatch *= crossover_scaling;
- power->fpq_wakeup_tagdrive *= crossover_scaling;
- power->fpq_wakeup_tagmatch *= crossover_scaling;
- power->fpq_wakeup_ormatch *= crossover_scaling;
- lym */
- /* power->selection *= crossover_scaling; */
- /* lym
- power->iq_selection *= crossover_scaling;
- power->fpq_selection *= crossover_scaling;
- power->lsq_selection *= crossover_scaling;
- lym */
- power->regfile_decoder *= crossover_scaling;
- power->regfile_wordline *= crossover_scaling;
- power->regfile_bitline *= crossover_scaling;
- power->regfile_senseamp *= crossover_scaling;
- power->rs_decoder *= crossover_scaling;
- power->rs_wordline *= crossover_scaling;
- power->rs_bitline *= crossover_scaling;
- power->rs_senseamp *= crossover_scaling;
- power->lsq_wakeup_tagdrive *= crossover_scaling;
- power->lsq_wakeup_tagmatch *= crossover_scaling;
- power->lsq_rs_decoder *= crossover_scaling;
- power->lsq_rs_wordline *= crossover_scaling;
- power->lsq_rs_bitline *= crossover_scaling;
- power->lsq_rs_senseamp *= crossover_scaling;
- power->resultbus *= crossover_scaling;
- power->btb *= crossover_scaling;
- power->local_predict *= crossover_scaling;
- power->global_predict *= crossover_scaling;
- power->chooser *= crossover_scaling;
- /*
- power->dtlb *= crossover_scaling;
- power->itlb *= crossover_scaling;
- power->icache_decoder *= crossover_scaling;
- power->icache_wordline*= crossover_scaling;
- power->icache_bitline *= crossover_scaling;
- power->icache_senseamp*= crossover_scaling;
- power->icache_tagarray*= crossover_scaling;
- power->icache_power *= crossover_scaling;
- power->dcache_decoder *= crossover_scaling;
- power->dcache_wordline *= crossover_scaling;
- power->dcache_bitline *= crossover_scaling;
- power->dcache_senseamp *= crossover_scaling;
- power->dcache_tagarray *= crossover_scaling;
- power->dcache_power *= crossover_scaling;
- power->clock_power *= crossover_scaling;
- power->dcache2_decoder *= crossover_scaling;
- power->dcache2_wordline *= crossover_scaling;
- power->dcache2_bitline *= crossover_scaling;
- power->dcache2_senseamp *= crossover_scaling;
- power->dcache2_tagarray *= crossover_scaling;
- power->dcache2_power *= crossover_scaling;
- */
- power->total_power = power->local_predict + power->global_predict +
- power->chooser + power->btb +
- power->rat_decoder + power->rat_wordline +
- power->rat_bitline + power->rat_senseamp +
- power->dcl_compare + power->dcl_pencode +
- power->inst_decoder_power +
- /* power->wakeup_tagdrive + power->wakeup_tagmatch + */ /* lym */
- power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch +
- power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch +
- /* power->selection *//* lym */ power->iq_selection + power->fpq_selection
- + power->lsq_selection +
- power->regfile_decoder + power->regfile_wordline +
- power->regfile_bitline + power->regfile_senseamp +
- power->rs_decoder + power->rs_wordline +
- power->rs_bitline + power->rs_senseamp +
- power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
- power->lsq_rs_decoder + power->lsq_rs_wordline +
- power->lsq_rs_bitline + power->lsq_rs_senseamp +
- power->resultbus +
- power->clock_power +
- power->icache_power +
- power->itlb +
- power->dcache_power +
- power->dtlb +
- power->dcache2_power;
- power->total_power_nodcache2 =power->local_predict + power->global_predict +
- power->chooser + power->btb +
- power->rat_decoder + power->rat_wordline +
- power->rat_bitline + power->rat_senseamp +
- power->dcl_compare + power->dcl_pencode +
- power->inst_decoder_power +
- /* power->wakeup_tagdrive + power->wakeup_tagmatch + */ /* lym */
- power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch +
- power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch +
- /* power->selection */ /* lym */ power->iq_selection +
- power->fpq_selection +
- power->lsq_selection +
- power->regfile_decoder + power->regfile_wordline +
- power->regfile_bitline + power->regfile_senseamp +
- power->rs_decoder + power->rs_wordline +
- power->rs_bitline + power->rs_senseamp +
- power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
- power->lsq_rs_decoder + power->lsq_rs_wordline +
- power->lsq_rs_bitline + power->lsq_rs_senseamp +
- power->resultbus +
- power->clock_power +
- power->icache_power +
- power->itlb +
- power->dcache_power +
- power->dtlb +
- power->dcache2_power;
- power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
- power->rat_power = power->rat_decoder +
- power->rat_wordline + power->rat_bitline + power->rat_senseamp;
- power->dcl_power = power->dcl_compare + power->dcl_pencode;
- power->rename_power = power->rat_power +
- power->dcl_power +
- power->inst_decoder_power;
- /* power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch +
- power->wakeup_ormatch; */ /* lym */
- power->iq_wakeup_power = power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch + power->iq_wakeup_ormatch;
- power->fpq_wakeup_power = power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch + power->fpq_wakeup_ormatch;
- power->rs_power = power->rs_decoder +
- power->rs_wordline + power->rs_bitline + power->rs_senseamp;
- power->rs_power_nobit = power->rs_decoder +
- power->rs_wordline + power->rs_senseamp;
- /* lym */
- power->window_power = power->iq_wakeup_power + power->fpq_wakeup_power + power->rs_power +
- /* power->selection */ /* lym */ power->iq_selection + power->fpq_selection + power->lsq_selection;
- /* lym */
- power->lsq_rs_power = power->lsq_rs_decoder +
- power->lsq_rs_wordline + power->lsq_rs_bitline +
- power->lsq_rs_senseamp;
- power->lsq_rs_power_nobit = power->lsq_rs_decoder +
- power->lsq_rs_wordline + power->lsq_rs_senseamp;
- power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
- power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
- power->regfile_power = power->regfile_decoder +
- power->regfile_wordline + power->regfile_bitline +
- power->regfile_senseamp;
- power->regfile_power_nobit = power->regfile_decoder +
- power->regfile_wordline + power->regfile_senseamp;
- /* MCREG Power modeling */
- ireg2_power_per_reg = compute_ireg2_access_power(4.91) * 32; /* 32 bit data */
- ireg2_address_power_per_reg = compute_ireg2_access_power(4.91) * 7; /* 7 bit address: 80 regs */
- //=============================================================/
- /* scale powers from mJ to J */
- power->regfile_power *= 1e-3;
- power->ialu_power *= 1e-3;
- power->falu_power *= 1e-3;
- power->resultbus *= 1e-3;
- power->clock_power *= 1e-3;
- power->regfile_power_nobit *= 1e-3;
- power->regfile_bitline *= 1e-3;
- double power_scale = frequency_scaling * voltage_scaling * voltage_scaling;
- power->power_scale = power_scale;
- #ifdef STATIC_AF
- double regfile_power_leakage = power_scale *turnoff_factor*power->regfile_power;
- #else
- double regfile_power_leakage = power_scale *turnoff_factor*power->regfile_power;
- #endif
- #ifdef STATIC_AF
- double resultbus_power_leakage = power_scale * turnoff_factor*power->resultbus;
- #else
- double resultbus_power_leakage = power_scale * turnoff_factor*power->resultbus;
- #endif
- power->regfile_leakage = regfile_power_leakage;
- power->ialu_leakage = power_scale *turnoff_factor*power->ialu_power;
- power->falu_leakage = power_scale *turnoff_factor*power->falu_power;
- power->resultbus_leakage = resultbus_power_leakage;
- power->decode_leakage = power_scale*turnoff_factor*power->inst_decoder_power;
- double total_cycle_leakage_cc3=power->regfile_leakage
- +power->ialu_leakage
- +power->falu_leakage
- +power->resultbus_leakage
- +power->decode_leakage;
- double max_cycle_power = power_scale*power->regfile_power \
- + power_scale*power->ialu_power \
- + power_scale*power->falu_power \
- + power_scale*power->resultbus\
- + power_scale*power->inst_decoder_power;
- power->clock_leakage = power_scale * power->clock_power*(total_cycle_leakage_cc3/max_cycle_power);
- power->total_leakage = power->regfile_leakage
- +power->ialu_leakage
- +power->falu_leakage
- +power->resultbus_leakage
- +power->clock_leakage
- +power->decode_leakage;
- //dump_power_stats(power);
- }
- void clear_dtm_stats();
- /* FIXME! many stats - like thermal emergencies, triggers, ..don't seem to be reset */
- void clear_lots_of_stats()
- {
- /*
- int i = 0;
- FPQ_fcount = 0;
- FPQ_count = 0;
- IQ_fcount = 0;
- IQ_count = 0;
- LSQ_fcount = 0;
- LSQ_count = 0;
- RUU_fcount = 0;
- RUU_count = 0;
- IFQ_fcount = 0;
- IFQ_count = 0;
- shadow_sim_cycle = sim_cycle;
- shadow_sim_num_insn = sim_num_insn;
- wall_clock_time = 0;
- sim_total_insn = 0;
- sim_num_branches = 0;
- sim_total_loads = 0;
- sim_total_refs = 0;
- sim_num_branches = 0;
- sim_num_loads = 0;
- sim_num_refs = 0;
- for(i = 0; i < flp_adj->n_units; i++)
- overall_power[i] = 0;
- */
- total_rename_access=0;
- total_bpred_access=0;
- total_window_access=0;
- total_lsq_access=0;
- total_iq_access=0;
- total_fpq_access=0;
- total_regfile_access=0;
- total_icache_access=0;
- total_dcache_access=0;
- total_dcache2_access=0;
- total_alu_access=0;
- total_resultbus_access=0;
- max_rename_access = 0;
- max_bpred_access = 0;
- max_window_access = 0;
- max_lsq_access = 0;
- max_regfile_access = 0;
- max_icache_access = 0;
- max_dcache_access = 0;
- max_dcache2_access = 0;
- max_alu_access = 0;
- max_resultbus_access = 0;
- zc_cycles = 0;
- //clear_dtm_stats();
- /*
- // clear predictor stats
- if (pred)
- bpred_clear_stats(pred);
- // clear cache stats
- if (cache_il1 && (cache_il1 != cache_dl1 && cache_il1 != cache_dl2))
- cache_clear_stats(cache_il1);
- if (cache_il2 && (cache_il2 != cache_dl1 && cache_il2 != cache_dl2))
- cache_clear_stats(cache_il2);
- if (cache_dl1)
- cache_clear_stats(cache_dl1);
- if (cache_dl2)
- cache_clear_stats(cache_dl2);
- if (itlb)
- cache_clear_stats(itlb);
- if (dtlb)
- cache_clear_stats(dtlb);
- */
- }