PageRenderTime 27ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/src/wattch/power.cc

https://bitbucket.org/musleh123/gem5_cetus
C++ | 2206 lines | 950 code | 316 blank | 940 comment | 40 complexity | 8726359bcbeabf7b0ab452c414ed8ef1 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. /* I inclued this copyright since we're using Cacti for some stuff */
  2. /*------------------------------------------------------------
  3. * Copyright 1994 Digital Equipment Corporation and Steve Wilton
  4. * All Rights Reserved
  5. *
  6. * Permission to use, copy, and modify this software and its documentation is
  7. * hereby granted only under the following terms and conditions. Both the
  8. * above copyright notice and this permission notice must appear in all copies
  9. * of the software, derivative works or modified versions, and any portions
  10. * thereof, and both notices must appear in supporting documentation.
  11. *
  12. * Users of this software agree to the terms and conditions set forth herein,
  13. * and hereby grant back to Digital a non-exclusive, unrestricted, royalty-
  14. * free right and license under any changes, enhancements or extensions
  15. * made to the core functions of the software, including but not limited to
  16. * those affording compatibility with other hardware or software
  17. * environments, but excluding applications which incorporate this software.
  18. * Users further agree to use their best efforts to return to Digital any
  19. * such changes, enhancements or extensions that they make and inform Digital
  20. * of noteworthy uses of this software. Correspondence should be provided
  21. * to Digital at:
  22. *
  23. * Director of Licensing
  24. * Western Research Laboratory
  25. * Digital Equipment Corporation
  26. * 100 Hamilton Avenue
  27. * Palo Alto, California 94301
  28. *
  29. * This software may be distributed (but not offered for sale or transferred
  30. * for compensation) to third parties, provided such third parties agree to
  31. * abide by the terms and conditions of this notice.
  32. *
  33. * THE SOFTWARE IS PROVIDED "AS IS" AND DIGITAL EQUIPMENT CORP. DISCLAIMS ALL
  34. * WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES
  35. * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL DIGITAL EQUIPMENT
  36. * CORPORATION BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
  37. * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
  38. * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
  39. * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
  40. * SOFTWARE.
  41. *------------------------------------------------------------*/
  42. #include <cassert>
  43. #include <cmath>
  44. #include <cstdio>
  45. #include <cstdlib>
  46. #include <string>
  47. #include "power.hh"
  48. using namespace std;
  49. /*
  50. #include "RC.h"
  51. #include "bpred.h"
  52. #include "cache.h"
  53. #include "dtm.h"
  54. #include "hotspot.h"
  55. #include "machine.h"
  56. #include "modes-opts.h" // lym
  57. #include "sim.h"
  58. */
  59. // TODO: add real number
  60. extern long long Mhz;
  61. #define verbose false
  62. #define SensePowerfactor (Mhz)*(Vdd/2)*(Vdd/2)
  63. #define Sense2Powerfactor (Mhz)*(2*.3+.1*Vdd)
  64. #define Powerfactor (Mhz)*Vdd*Vdd
  65. #define LowSwingPowerfactor (Mhz)*.2*.2
  66. /* set scale for crossover (vdd->gnd) currents */
  67. const double crossover_scaling = 1.2;
  68. /* set non-ideal turnoff percentage */
  69. const double turnoff_factor = 0.1;
  70. /* freq/voltage scaling */
  71. const double frequency_scaling = 1.0;
  72. const double voltage_scaling = 1.0;
  73. #define MSCALE (LSCALE * .624 / .2250)
  74. int thermal_warmup_done = 0;
  75. /*----------------------------------------------------------------------*/
  76. /* static power model results */
  77. power_result_type power;
  78. int pow2(int x) {
  79. return((int)pow(2.0,(double)x));
  80. }
  81. double logfour(double x)
  82. {
  83. if (x<=0) fprintf(stderr,"%e\n",x);
  84. return( (double) (log(x)/log(4.0)) );
  85. }
  86. /* safer pop count to validate the fast algorithm */
  87. int pop_count_slow(qword_t bits)
  88. {
  89. int count = 0;
  90. qword_t tmpbits = bits;
  91. while (tmpbits) {
  92. if (tmpbits & 1) ++count;
  93. tmpbits >>= 1;
  94. }
  95. return count;
  96. }
  97. /* fast pop count */
  98. int pop_count(qword_t bits)
  99. {
  100. #define T unsigned long long
  101. #define ONES ((T)(-1))
  102. #define TWO(k) ((T)1 << (k))
  103. #define CYCL(k) (ONES/(1 + (TWO(TWO(k)))))
  104. #define BSUM(x,k) ((x)+=(x) >> TWO(k), (x) &= CYCL(k))
  105. qword_t x = bits;
  106. x = (x & CYCL(0)) + ((x>>TWO(0)) & CYCL(0));
  107. x = (x & CYCL(1)) + ((x>>TWO(1)) & CYCL(1));
  108. BSUM(x,2);
  109. BSUM(x,3);
  110. BSUM(x,4);
  111. BSUM(x,5);
  112. return x;
  113. }
  114. const int opcode_length = 8;
  115. const int inst_length = 32;
  116. int nvreg_width;
  117. int npreg_width;
  118. extern int bimod_config[];
  119. extern struct cache_t *cache_dl1;
  120. extern struct cache_t *cache_il1;
  121. extern struct cache_t *cache_dl2;
  122. extern struct cache_t *cache_il2;
  123. extern struct cache_t *dtlb;
  124. extern struct cache_t *itlb;
  125. /* 2-level predictor config (<l1size> <l2size> <hist_size> <xor>) */
  126. extern int twolev_config[];
  127. /* combining predictor config (<meta_table_size> */
  128. extern int comb_config[];
  129. /* return address stack (RAS) size */
  130. extern int ras_size;
  131. /* BTB predictor config (<num_sets> <associativity>) */
  132. extern int btb_config[];
  133. double global_clockcap;
  134. /*
  135. static double rename_power=0;
  136. static double bpred_power=0;
  137. static double window_power=0;
  138. static double lsq_power=0;
  139. static double regfile_power=0;
  140. //static double icache_power=0;
  141. //static double dcache_power=0;
  142. //static double dcache2_power=0;
  143. static double alu_power=0;
  144. static double falu_power=0;
  145. static double resultbus_power=0;
  146. static double clock_power=0;
  147. static double rename_power_cc1=0;
  148. static double bpred_power_cc1=0;
  149. //static double window_power_cc1=0;
  150. static double iq_power_cc1=0;
  151. static double fpq_power_cc1=0;
  152. static double lsq_power_cc1=0;
  153. static double regfile_power_cc1=0;
  154. static double icache_power_cc1=0;
  155. static double dcache_power_cc1=0;
  156. //static double dcache2_power_cc1=0;
  157. static double alu_power_cc1=0;
  158. static double resultbus_power_cc1=0;
  159. static double clock_power_cc1=0;
  160. static double rename_power_cc2=0;
  161. static double bpred_power_cc2=0;
  162. //static double window_power_cc2=0;
  163. static double iq_power_cc2=0;
  164. static double fpq_power_cc2=0;
  165. static double lsq_power_cc2=0;
  166. static double regfile_power_cc2=0;
  167. static double icache_power_cc2=0;
  168. static double dcache_power_cc2=0;
  169. //static double dcache2_power_cc2=0;
  170. static double alu_power_cc2=0;
  171. static double resultbus_power_cc2=0;
  172. static double clock_power_cc2=0;
  173. */
  174. /*
  175. static double rename_power_cc3=0;
  176. static double bpred_power_cc3=0;
  177. //static double window_power_cc3=0;
  178. static double iq_power_cc3=0;
  179. static double fpq_power_cc3=0;
  180. static double lsq_power_cc3=0;
  181. static double icache_power_cc3=0;
  182. static double dcache_power_cc3=0;
  183. static double dcache2_power_cc3=0;
  184. static double regfile_power_cc3=0;
  185. static double alu_power_cc3=0;
  186. //static double int_alu_power_cc3=0;
  187. //static double fp_alu_power_cc3=0;
  188. static double resultbus_power_cc3=0;
  189. static double clock_power_cc3=0;
  190. static double total_cycle_power;
  191. */
  192. double cycle_rename_power_cc3=0;
  193. double cycle_bpred_power_cc3=0;
  194. double cycle_bpreddir_power_cc3=0;
  195. double cycle_window_power_cc3=0;
  196. double cycle_lsq_power_cc3=0;
  197. double cycle_regfile_power_cc3=0;
  198. double cycle_icache_power_cc3=0;
  199. double cycle_dcache_power_cc3=0;
  200. double cycle_dcache2_power_cc3=0;
  201. double cycle_alu_power_cc3=0;
  202. double cycle_falu_power_cc3=0;
  203. double cycle_resultbus_power_cc3=0;
  204. double cycle_clock_power_cc3=0;
  205. static counter_t total_rename_access=0;
  206. static counter_t total_bpred_access=0;
  207. static counter_t total_window_access=0;
  208. static counter_t total_lsq_access=0;
  209. static counter_t total_iq_access = 0;
  210. static counter_t total_fpq_access = 0;
  211. static counter_t total_regfile_access=0;
  212. static counter_t total_icache_access=0;
  213. static counter_t total_dcache_access=0;
  214. static counter_t total_dcache2_access=0;
  215. static counter_t total_alu_access=0;
  216. static counter_t total_resultbus_access=0;
  217. static counter_t max_rename_access;
  218. static counter_t max_bpred_access;
  219. static counter_t max_window_access;
  220. static counter_t max_lsq_access;
  221. static counter_t max_regfile_access;
  222. static counter_t max_icache_access;
  223. static counter_t max_dcache_access;
  224. static counter_t max_dcache2_access;
  225. static counter_t max_alu_access;
  226. static counter_t max_resultbus_access;
  227. static counter_t zc_cycles;
  228. int reg_counter = 0;
  229. int int_window_counter = 0;
  230. int fp_window_counter = 0;
  231. double fp_reg_power = 0;
  232. double rp_rename_power = 0;
  233. counter_t shadow_sim_cycle = 0;
  234. counter_t shadow_sim_num_insn = 0;
  235. /* MCREG Power modeling */
  236. // static double ireg2_access_power, ireg2_address_power;
  237. static double ireg2_power_per_reg, ireg2_address_power_per_reg;
  238. /*
  239. static double total_cycle_power_cc3=0.0;
  240. static double total_power_pipeline=0.0;
  241. static double total_power_cache=0.0;
  242. static double total_power_slave_l1cache=0.0;
  243. static double total_power_master_l1cache=0.0;
  244. static double total_power_l2cache=0.0;
  245. static double total_leakage_l1cache=0.0;
  246. static double total_leakage_l2cache=0.0;
  247. static double total_power_stream=0.0;
  248. static double total_leakage_stream=0.0;
  249. static double total_power_spill=0.0;
  250. static double total_power_interconn=0.0;
  251. static double total_power_bus=0.0;
  252. static double total_power_balancer=0.0;
  253. double total_power_mem = 0.0;
  254. double total_power_memspill = 0.0;
  255. static double cycle_leakage_pipeline=0.0;
  256. static double cycle_leakage_l1cache=0.0;
  257. static double cycle_leakage_l2cache=0.0;
  258. static double cycle_leakage_stream=0.0;
  259. static double cycle_leakage_spill = 0.0;
  260. static double cycle_leakage_interconn=0.0;
  261. //double strmbuf_idle_leakage=0.0, cache_idle_leakage=0.0, processor_idle_leakage=0.0, spillbuf_idle_leakage=0.0;
  262. double strmbuf_power_R[4], strmbuf_power_W[4], spillbuf_power_R[4], spillbuf_power_W[4];
  263. double L1cache_power_R, L1cache_power_W, L2cache_power_R, L2cache_power_W;
  264. double strmbuf_leakage=100.0, spillbuf_leakage=100.0, l1cache_leakage=100.0, l2cache_leakage=100.0;
  265. double balancer_power_R;
  266. double balancer_power_W;
  267. double balancer_leakage;
  268. extern double packet_power_read;
  269. extern double packet_power_write;
  270. extern double link_power_bit;
  271. extern int nStrmbuf;
  272. extern int nSpillbuf;
  273. extern int nProcs;
  274. extern int nL1cache;
  275. extern int nL2cache;
  276. extern double mem_power;
  277. double SB_area=0.0;
  278. double L1_area=0.0, L2_area=0.0;
  279. double balancer_area=0.0;
  280. */
  281. double
  282. compute_ireg2_access_power(double wirelength)
  283. {
  284. /* wirelength argument is in mm */
  285. double Cwire, Pwire;
  286. /* The power dissipated in wires is:
  287. * Pwire = 0.5 * Cwire * V * V * f
  288. * Cwire = capacitance of wire = Cmetal (F/um) * wirelength (mm) */
  289. Cwire = Cmetal * wirelength * 1000;
  290. Pwire = 0.5 * Cwire * Powerfactor;
  291. return Pwire;
  292. }
  293. /* compute bitline activity factors which we use to scale bitline power
  294. Here it is very important whether we assume 0's or 1's are
  295. responsible for dissipating power in pre-charged stuctures. (since
  296. most of the bits are 0's, we assume the design is power-efficient
  297. enough to allow 0's to _not_ discharge
  298. */
  299. double compute_af(counter_t num_pop_count_cycle,counter_t total_pop_count_cycle,int pop_width) {
  300. double avg_pop_count;
  301. double af,af_b;
  302. if(num_pop_count_cycle)
  303. avg_pop_count = (double)total_pop_count_cycle / (double)num_pop_count_cycle;
  304. else
  305. avg_pop_count = 0;
  306. af = avg_pop_count / (double)pop_width;
  307. af_b = 1.0 - af;
  308. /* printf("af == %f%%, af_b == %f%%, total_pop == %d, num_pop == %d\n",100*af,100*af_b,total_pop_count_cycle,num_pop_count_cycle); */
  309. return(af_b);
  310. }
  311. /*
  312. void write_power_stats(char* output)
  313. {
  314. FILE* fp=NULL;
  315. if(output){
  316. fp = fopen(output, "a");
  317. }
  318. if(fp==NULL)
  319. fp = stdout;
  320. total_power_cache = total_power_slave_l1cache \
  321. + total_power_master_l1cache \
  322. +total_power_l2cache \
  323. +total_leakage_l1cache \
  324. +total_leakage_l2cache;
  325. double total_chip_power = total_power_cache
  326. +total_power_interconn
  327. +total_power_bus
  328. +total_power_pipeline
  329. +total_power_stream
  330. +total_leakage_stream
  331. +total_power_spill
  332. +total_power_balancer;
  333. double total_power = total_chip_power \
  334. +total_power_mem+total_power_memspill;
  335. fprintf(fp, "\t");
  336. string delimiter = " \t";
  337. string description;
  338. string format;
  339. //=== cycles ===
  340. format = " \t%Lu";
  341. unsigned long long execCycles;
  342. if (flag_RecordStats)
  343. execCycles = (unsigned long long)(recordedTick+curTick-recordStartTick)/CLOCK_TICKS;
  344. else
  345. execCycles = (unsigned long long)(recordedTick)/CLOCK_TICKS;
  346. printNameFormat("cycles", execCycles);
  347. printNameFormat("tick", (unsigned long long) curTick/CLOCK_TICKS);
  348. //=== counters ===
  349. format=" \t%Ld";
  350. printNameFormat("#strmbuf_read", total_strmbuf_read);
  351. printNameFormat("#strm_cpu_readmiss", total_cpu_read_stream_misses);
  352. printNameFormat("#strm_resp_net", total_strmbuf_read_net);
  353. printNameFormat("#strm_fwdnet_latefills", total_fwd_net_latefills);
  354. printNameFormat("#strm_resp_spill", total_strmbuf_read_spill);
  355. printNameFormat("#strm_fwdspill_latefills", total_fwd_spill_latefills);
  356. printNameFormat("#strmbuf_write", total_strmbuf_write);
  357. printNameFormat("#strmbuf_spills", total_strmbuf_writespill);
  358. printNameFormat("#strmbuf_spills_by_fwd", total_strmbuf_fwdspill);
  359. printNameFormat("#fwd_from_net", total_fwd_net);
  360. printNameFormat("#fwd_from_spill", total_fwd_spill);
  361. printNameFormat("#fwd_from_mem", total_fwd_mem);
  362. printNameFormat("#packets", total_packets);
  363. //printNameFormat("#spills", total_spills);
  364. printNameFormat("#spill_writes", total_spill_writes);
  365. printNameFormat("#spill_reads", total_spill_reads);
  366. printNameFormat("#spill_readmisses", total_spill_readmisses);
  367. printNameFormat("#spill_mem", total_mem_spills);
  368. printNameFormat("#mem_strm_reads", total_mem_strmreads);
  369. printNameFormat("#mem_strm_writes", total_mem_strmwrites);
  370. printNameFormat("#max_spill_memlines", max_mem_spilledlines);
  371. printNameFormat("#L1slave_I$_Wr", total_l1slave_Iwrite);
  372. printNameFormat("#L1slave_I$_Wmis", total_l1slave_Wr_Imisses);
  373. printNameFormat("#L1slave_I$_Rd", total_l1slave_Iread);
  374. printNameFormat("#L1slave_I$_Rmis", total_l1slave_Rd_Imisses);
  375. printNameFormat("#L1slave_D$_Wr", total_l1slave_Dwrite);
  376. printNameFormat("#L1slave_D$_Wmis", total_l1slave_Wr_Dmisses);
  377. printNameFormat("#L1slave_D$_Rd", total_l1slave_Dread);
  378. printNameFormat("#L1slave_D$_Rmis", total_l1slave_Rd_Dmisses);
  379. printNameFormat("#L1master_I$_Wr", total_l1master_Iwrite);
  380. printNameFormat("#L1master_I$_Wmis", total_l1master_Wr_Imisses);
  381. printNameFormat("#L1master_I$_Rd", total_l1master_Iread);
  382. printNameFormat("#L1master_I$_Rmis", total_l1master_Rd_Imisses);
  383. printNameFormat("#L1master_D$_Wr", total_l1master_Dwrite);
  384. printNameFormat("#L1master_D$_Wmis", total_l1master_Wr_Dmisses);
  385. printNameFormat("#L1master_D$_Rd", total_l1master_Dread);
  386. printNameFormat("#L1master_D$_Rmis", total_l1master_Rd_Dmisses);
  387. printNameFormat("#L2_Rd", total_l2cache_read);
  388. printNameFormat("#L2_Rdmis", total_l2_Rd_misses);
  389. printNameFormat("#L2_RdEx", total_l2cache_readEx);
  390. printNameFormat("#L2_RdExmis", total_l2_RdEx_misses);
  391. printNameFormat("#L2_Wr", total_l2cache_write);
  392. printNameFormat("#L2_Wrmis", total_l2_Wr_misses);
  393. printNameFormat("#ins_slave_load", total_slave_memread_ins);
  394. printNameFormat("#ins_slave_store", total_slave_memwrite_ins);
  395. printNameFormat("#ins_xflow_read", total_slave_flowread_ins);
  396. printNameFormat("#ins_xflow_write", total_slave_flowwrite_ins);
  397. // === cpu status fractions ===
  398. if(execCycles){
  399. format="\t%g";
  400. for (int n=0; n<nProcs; n++){
  401. double total_fraction=0.0;
  402. for(int s=0; s<NumCpuStatus; s++){
  403. Tick cycles = cpusStatusPeriod[s][n]/CLOCK_TICKS;
  404. //if(cycles==0)
  405. //continue;
  406. double fraction = (double)cycles/(double)execCycles;
  407. total_fraction += fraction;
  408. fprintf(fp, "cpu%d_%s\t%g\t", n, cpusStatusNames[s].c_str(), fraction);
  409. }
  410. assert(total_fraction>0.99 && total_fraction < 1.1);
  411. }
  412. }
  413. //=== power stats ===
  414. format=" \t%g";
  415. printNameFormat("pipeline_energy", total_power_pipeline);
  416. printNameFormat("network_energy", total_power_interconn);
  417. printNameFormat("bus_energy", total_power_bus);
  418. printNameFormat("cache_slave_L1_active_energy", total_power_slave_l1cache);
  419. printNameFormat("cache_master_L1_active_energy",total_power_master_l1cache);
  420. printNameFormat("cache_L2_active_energy", total_power_l2cache);
  421. printNameFormat("cache_L1_leakage", total_leakage_l1cache);
  422. printNameFormat("cache_L2_leakage", total_leakage_l2cache);
  423. if(!EnableTmprL2asSRF){
  424. printNameFormat("strmbuf_active_energy", total_power_stream);
  425. printNameFormat("strmbuf_leakage", total_leakage_stream);
  426. }else{
  427. printNameFormat("strmbuf_active_energy", 0.0);
  428. printNameFormat("strmbuf_leakage", 0.0);
  429. }
  430. printNameFormat("spillbuf_energy", total_power_spill);
  431. printNameFormat("balancer_energy", total_power_balancer);
  432. printNameFormat("memory_energy", total_power_mem);
  433. printNameFormat("memory_spill_energy", total_power_memspill);
  434. printNameFormat("total_on_chip_energy", total_chip_power);
  435. printNameFormat("total_energy", total_power);
  436. // === area ===
  437. format = " \t%g";
  438. if(!EnableTmprL2asSRF){
  439. printNameFormat("strmbuf_area", SB_area*nStrmbuf);
  440. }else{
  441. printNameFormat("strmbuf_area", 0.0);
  442. }
  443. printNameFormat("L1_area", L1_area*nL1cache);
  444. printNameFormat("L2_area", L2_area*nL2cache);
  445. printNameFormat("balancer_area", balancer_area);
  446. fprintf(fp, "\n");
  447. if (fp!=stdout)
  448. fclose(fp);
  449. }
  450. */
  451. /* this routine takes the number of rows and cols of an array structure
  452. and attemps to make it make it more of a reasonable circuit structure
  453. by trying to make the number of rows and cols as close as possible.
  454. (scaling both by factors of 2 in opposite directions). it returns
  455. a scale factor which is the amount that the rows should be divided
  456. by and the columns should be multiplied by.
  457. */
  458. int squarify(int rows, int cols)
  459. {
  460. int scale_factor = 1;
  461. if(rows == cols)
  462. return 1;
  463. /*
  464. printf("init rows == %d\n",rows);
  465. printf("init cols == %d\n",cols);
  466. */
  467. while(rows > cols) {
  468. rows = rows/2;
  469. cols = cols*2;
  470. /*
  471. printf("rows == %d\n",rows);
  472. printf("cols == %d\n",cols);
  473. printf("scale_factor == %d (2^ == %d)\n\n",scale_factor,(int)pow(2.0,(double)scale_factor));
  474. */
  475. if (rows/2 <= cols)
  476. return((int)pow(2.0,(double)scale_factor));
  477. scale_factor++;
  478. }
  479. return 1;
  480. }
  481. /* could improve squarify to work when rows < cols */
  482. double squarify_new(int rows, int cols)
  483. {
  484. double scale_factor = 0.0;
  485. if(rows==cols)
  486. return(pow(2.0,scale_factor));
  487. while(rows > cols) {
  488. rows = rows/2;
  489. cols = cols*2;
  490. if (rows <= cols)
  491. return(pow(2.0,scale_factor));
  492. scale_factor++;
  493. }
  494. while(cols > rows) {
  495. rows = rows*2;
  496. cols = cols/2;
  497. if (cols <= rows)
  498. return(pow(2.0,scale_factor));
  499. scale_factor--;
  500. }
  501. return 1;
  502. }
  503. void dump_power_stats(power_result_type *power, const CpuProfile& profile)
  504. {
  505. double total_power;
  506. double bpred_power;
  507. double rename_power;
  508. double rat_power;
  509. double dcl_power;
  510. double lsq_power;
  511. double window_power;
  512. double wakeup_power;
  513. double rs_power;
  514. double lsq_wakeup_power;
  515. double lsq_rs_power;
  516. double regfile_power;
  517. double reorder_power;
  518. double icache_power;
  519. double dcache_power;
  520. double dcache2_power;
  521. double dtlb_power;
  522. double itlb_power;
  523. double ambient_power = 2.0;
  524. icache_power = power->icache_power;
  525. dcache_power = power->dcache_power;
  526. dcache2_power = power->dcache2_power;
  527. itlb_power = power->itlb;
  528. dtlb_power = power->dtlb;
  529. bpred_power = power->btb + power->local_predict + power->global_predict +
  530. power->chooser + power->ras;
  531. rat_power = power->rat_decoder +
  532. power->rat_wordline + power->rat_bitline + power->rat_senseamp;
  533. dcl_power = power->dcl_compare + power->dcl_pencode;
  534. rename_power = power->rat_power + power->dcl_power + power->inst_decoder_power;
  535. /* wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch +
  536. power->wakeup_ormatch; */
  537. /* lym */
  538. wakeup_power = power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch +
  539. power->iq_wakeup_ormatch + power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch + power->fpq_wakeup_ormatch;
  540. /* lym */
  541. rs_power = power->rs_decoder +
  542. power->rs_wordline + power->rs_bitline + power->rs_senseamp;
  543. window_power = wakeup_power + rs_power + /* power->selection *//* lym */
  544. + power->iq_selection + power->fpq_selection
  545. + power->lsq_selection;
  546. lsq_rs_power = power->lsq_rs_decoder +
  547. power->lsq_rs_wordline + power->lsq_rs_bitline + power->lsq_rs_senseamp;
  548. lsq_wakeup_power = power->lsq_wakeup_tagdrive +
  549. power->lsq_wakeup_tagmatch + power->lsq_wakeup_ormatch;
  550. lsq_power = lsq_wakeup_power + lsq_rs_power;
  551. reorder_power = power->reorder_decoder +
  552. power->reorder_wordline + power->reorder_bitline +
  553. power->reorder_senseamp;
  554. regfile_power = power->regfile_decoder +
  555. power->regfile_wordline + power->regfile_bitline +
  556. power->regfile_senseamp;
  557. total_power = bpred_power + rename_power + window_power + regfile_power +
  558. power->resultbus + lsq_power +
  559. icache_power + dcache_power + dcache2_power +
  560. dtlb_power + itlb_power + power->clock_power + power->ialu_power +
  561. power->falu_power;
  562. fprintf(stderr,"\nProcessor Parameters:\n");
  563. fprintf(stderr,"Issue Width: %d\n",profile.ruu_issue_width);
  564. fprintf(stderr,"Window Size: %d\n",profile.RUU_size);
  565. fprintf(stderr,"Number of Virtual Registers: %d\n",MD_NUM_IREGS);
  566. fprintf(stderr,"Number of Physical Registers: %d\n",profile.RUU_size);
  567. fprintf(stderr,"Datapath Width: %d\n",profile.data_width);
  568. fprintf(stderr,"Total Power Consumption: %g\n",total_power+ambient_power);
  569. fprintf(stderr,"Branch Predictor Power Consumption: %g (%.3g%%)\n",bpred_power,100*bpred_power/total_power);
  570. fprintf(stderr," branch target buffer power (W): %g\n",power->btb);
  571. fprintf(stderr," local predict power (W): %g\n",power->local_predict);
  572. fprintf(stderr," global predict power (W): %g\n",power->global_predict);
  573. fprintf(stderr," chooser power (W): %g\n",power->chooser);
  574. fprintf(stderr," RAS power (W): %g\n",power->ras);
  575. fprintf(stderr,"Rename Logic Power Consumption: %g (%.3g%%)\n",rename_power,100*rename_power/total_power);
  576. fprintf(stderr," Instruction Decode Power (W): %g\n",power->inst_decoder_power);
  577. fprintf(stderr," RAT decode_power (W): %g\n",power->rat_decoder);
  578. fprintf(stderr," RAT wordline_power (W): %g\n",power->rat_wordline);
  579. fprintf(stderr," RAT bitline_power (W): %g\n",power->rat_bitline);
  580. fprintf(stderr," DCL Comparators (W): %g\n",power->dcl_compare);
  581. fprintf(stderr,"Instruction Window Power Consumption: %g (%.3g%%)\n",window_power,100*window_power/total_power);
  582. /* fprintf(stderr," tagdrive (W): %g\n",power->wakeup_tagdrive);
  583. fprintf(stderr," tagmatch (W): %g\n",power->wakeup_tagmatch);
  584. fprintf(stderr," Selection Logic (W): %g\n",power->selection); */
  585. /* lym */
  586. fprintf(stderr," decode_power (W): %g\n",power->rs_decoder);
  587. fprintf(stderr," wordline_power (W): %g\n",power->rs_wordline);
  588. fprintf(stderr," bitline_power (W): %g\n",power->rs_bitline);
  589. fprintf(stderr,"Load/Store Queue Power Consumption: %g (%.3g%%)\n",lsq_power,100*lsq_power/total_power);
  590. fprintf(stderr," tagdrive (W): %g\n",power->lsq_wakeup_tagdrive);
  591. fprintf(stderr," tagmatch (W): %g\n",power->lsq_wakeup_tagmatch);
  592. fprintf(stderr," decode_power (W): %g\n",power->lsq_rs_decoder);
  593. fprintf(stderr," wordline_power (W): %g\n",power->lsq_rs_wordline);
  594. fprintf(stderr," bitline_power (W): %g\n",power->lsq_rs_bitline);
  595. fprintf(stderr,"Arch. Register File Power Consumption: %g (%.3g%%)\n",regfile_power,100*regfile_power/total_power);
  596. fprintf(stderr," decode_power (W): %g\n",power->regfile_decoder);
  597. fprintf(stderr," wordline_power (W): %g\n",power->regfile_wordline);
  598. fprintf(stderr," bitline_power (W): %g\n",power->regfile_bitline);
  599. fprintf(stderr,"Result Bus Power Consumption: %g (%.3g%%)\n",power->resultbus,100*power->resultbus/total_power);
  600. fprintf(stderr,"Total Clock Power: %g (%.3g%%)\n",power->clock_power,100*power->clock_power/total_power);
  601. fprintf(stderr,"Int ALU Power: %g (%.3g%%)\n",power->ialu_power,100*power->ialu_power/total_power);
  602. fprintf(stderr,"FP ALU Power: %g (%.3g%%)\n",power->falu_power,100*power->falu_power/total_power);
  603. fprintf(stderr,"Instruction Cache Power Consumption: %g (%.3g%%)\n",icache_power,100*icache_power/total_power);
  604. fprintf(stderr," decode_power (W): %g\n",power->icache_decoder);
  605. fprintf(stderr," wordline_power (W): %g\n",power->icache_wordline);
  606. fprintf(stderr," bitline_power (W): %g\n",power->icache_bitline);
  607. fprintf(stderr," senseamp_power (W): %g\n",power->icache_senseamp);
  608. fprintf(stderr," tagarray_power (W): %g\n",power->icache_tagarray);
  609. fprintf(stderr,"Itlb_power (W): %g (%.3g%%)\n",power->itlb,100*power->itlb/total_power);
  610. fprintf(stderr,"Data Cache Power Consumption: %g (%.3g%%)\n",dcache_power,100*dcache_power/total_power);
  611. fprintf(stderr," decode_power (W): %g\n",power->dcache_decoder);
  612. fprintf(stderr," wordline_power (W): %g\n",power->dcache_wordline);
  613. fprintf(stderr," bitline_power (W): %g\n",power->dcache_bitline);
  614. fprintf(stderr," senseamp_power (W): %g\n",power->dcache_senseamp);
  615. fprintf(stderr," tagarray_power (W): %g\n",power->dcache_tagarray);
  616. fprintf(stderr,"Dtlb_power (W): %g (%.3g%%)\n",power->dtlb,100*power->dtlb/total_power);
  617. fprintf(stderr,"Level 2 Cache Power Consumption: %g (%.3g%%)\n",dcache2_power,100*dcache2_power/total_power);
  618. fprintf(stderr," decode_power (W): %g\n",power->dcache2_decoder);
  619. fprintf(stderr," wordline_power (W): %g\n",power->dcache2_wordline);
  620. fprintf(stderr," bitline_power (W): %g\n",power->dcache2_bitline);
  621. fprintf(stderr," senseamp_power (W): %g\n",power->dcache2_senseamp);
  622. fprintf(stderr," tagarray_power (W): %g\n",power->dcache2_tagarray);
  623. }
  624. /*======================================================================*/
  625. /*
  626. * This part of the code contains routines for each section as
  627. * described in the tech report. See the tech report for more details
  628. * and explanations */
  629. /*----------------------------------------------------------------------*/
  630. double driver_size(double driving_cap, double desiredrisetime) {
  631. double nsize, psize;
  632. double Rpdrive;
  633. Rpdrive = desiredrisetime/(driving_cap*log(VSINV)*-1.0);
  634. psize = restowidth(Rpdrive,PCH);
  635. nsize = restowidth(Rpdrive,NCH);
  636. if (psize > Wworddrivemax) {
  637. psize = Wworddrivemax;
  638. }
  639. if (psize < 4.0 * LSCALE)
  640. psize = 4.0 * LSCALE;
  641. return (psize);
  642. }
  643. /* Decoder delay: (see section 6.1 of tech report) */
  644. double array_decoder_power
  645. (
  646. int rows, int cols,
  647. double predeclength,
  648. int rports, int wports,
  649. int cache
  650. )
  651. {
  652. double Ctotal=0;
  653. double Ceq=0;
  654. int numstack;
  655. int decode_bits=0;
  656. int ports;
  657. double rowsb;
  658. /* read and write ports are the same here */
  659. ports = rports + wports;
  660. rowsb = (double)rows;
  661. /* number of input bits to be decoded */
  662. decode_bits=(int)ceil((logtwo(rowsb)));
  663. /* First stage: driving the decoders */
  664. /* This is the capacitance for driving one bit (and its complement).
  665. -There are #rowsb 3->8 decoders contributing gatecap.
  666. - 2.0 factor from 2 identical sets of drivers in parallel
  667. */
  668. Ceq = 2.0*(draincap(Wdecdrivep,PCH,1)+draincap(Wdecdriven,NCH,1)) +
  669. gatecap(Wdec3to8n+Wdec3to8p,10.0)*rowsb;
  670. /* There are ports * #decode_bits total */
  671. Ctotal+=ports*decode_bits*Ceq;
  672. if(verbose)
  673. fprintf(stderr,"Decoder -- Driving decoders == %g\n",.3*Ctotal*Powerfactor);
  674. /* second stage: driving a bunch of nor gates with a nand
  675. numstack is the size of the nor gates -- ie. a 7-128 decoder has
  676. 3-input NAND followed by 3-input NOR */
  677. numstack = (int)ceil((1.0/3.0)*logtwo(rows));
  678. if (numstack<=0) numstack = 1;
  679. if (numstack>5) numstack = 5;
  680. /* There are #rowsb NOR gates being driven*/
  681. Ceq = (3.0*draincap(Wdec3to8p,PCH,1) +draincap(Wdec3to8n,NCH,3) +
  682. gatecap(WdecNORn+WdecNORp,((numstack*40)+20.0)))*rowsb;
  683. Ctotal+=ports*Ceq;
  684. if(verbose)
  685. fprintf(stderr,"Decoder -- Driving nor w/ nand == %g\n",.3*ports*Ceq*Powerfactor);
  686. /* Final stage: driving an inverter with the nor
  687. (inverter preceding wordline driver) -- wordline driver is in the next section*/
  688. Ceq = (gatecap(Wdecinvn+Wdecinvp,20.0)+
  689. numstack*draincap(WdecNORn,NCH,1)+
  690. draincap(WdecNORp,PCH,numstack));
  691. if(verbose)
  692. fprintf(stderr,"Decoder -- Driving inverter w/ nor == %g\n",.3*ports*Ceq*Powerfactor);
  693. Ctotal+=ports*Ceq;
  694. /* assume Activity Factor == .3 */
  695. return(.3*Ctotal*Powerfactor);
  696. }
  697. double simple_array_decoder_power
  698. (
  699. int rows, int cols,
  700. int rports, int wports,
  701. int cache
  702. )
  703. {
  704. double predeclength=0.0;
  705. return(array_decoder_power(rows,cols,predeclength,rports,wports,cache));
  706. }
  707. double array_wordline_power
  708. (
  709. int rows, int cols,
  710. double wordlinelength,
  711. int rports, int wports,
  712. int cache
  713. )
  714. {
  715. double Ctotal=0;
  716. double Ceq=0;
  717. double Cline=0;
  718. double Cliner, Clinew=0;
  719. double desiredrisetime,psize,nsize;
  720. int ports;
  721. double colsb;
  722. ports = rports+wports;
  723. colsb = (double)cols;
  724. /* Calculate size of wordline drivers assuming rise time == Period / 8
  725. - estimate cap on line
  726. - compute min resistance to achieve this with RC
  727. - compute width needed to achieve this resistance */
  728. desiredrisetime = Period/16;
  729. Cline = (gatecappass(Wmemcellr,1.0))*colsb + wordlinelength*CM3metal;
  730. psize = driver_size(Cline,desiredrisetime);
  731. /* how do we want to do p-n ratioing? -- here we just assume the same ratio
  732. from an inverter pair */
  733. nsize = psize * Wdecinvn/Wdecinvp;
  734. if(verbose)
  735. fprintf(stderr,"Wordline Driver Sizes -- nsize == %f, psize == %f\n",nsize,psize);
  736. Ceq = draincap(Wdecinvn,NCH,1) + draincap(Wdecinvp,PCH,1) +
  737. gatecap(nsize+psize,20.0);
  738. Ctotal+=ports*Ceq;
  739. if(verbose)
  740. fprintf(stderr,"Wordline -- Inverter -> Driver == %g\n",ports*Ceq*Powerfactor);
  741. /* Compute caps of read wordline and write wordlines
  742. - wordline driver caps, given computed width from above
  743. - read wordlines have 1 nmos access tx, size ~4
  744. - write wordlines have 2 nmos access tx, size ~2
  745. - metal line cap
  746. */
  747. Cliner = (gatecappass(Wmemcellr,(BitWidth-2*Wmemcellr)/2.0))*colsb+
  748. wordlinelength*CM3metal+
  749. 2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
  750. Clinew = (2.0*gatecappass(Wmemcellw,(BitWidth-2*Wmemcellw)/2.0))*colsb+
  751. wordlinelength*CM3metal+
  752. 2.0*(draincap(nsize,NCH,1) + draincap(psize,PCH,1));
  753. if(verbose) {
  754. fprintf(stderr,"Wordline -- Line == %g\n",1e12*Cline);
  755. fprintf(stderr,"Wordline -- Line -- access -- gatecap == %g\n",1e12*colsb*2*gatecappass(Wmemcella,(BitWidth-2*Wmemcella)/2.0));
  756. fprintf(stderr,"Wordline -- Line -- driver -- draincap == %g\n",1e12*draincap(nsize,NCH,1) + draincap(psize,PCH,1));
  757. fprintf(stderr,"Wordline -- Line -- metal == %g\n",1e12*wordlinelength*CM3metal);
  758. }
  759. Ctotal+=rports*Cliner+wports*Clinew;
  760. /* AF == 1 assuming a different wordline is charged each cycle, but only
  761. 1 wordline (per port) is actually used */
  762. return(Ctotal*Powerfactor);
  763. }
  764. double simple_array_wordline_power
  765. (
  766. int rows, int cols,
  767. int rports, int wports,
  768. int cache
  769. )
  770. {
  771. double wordlinelength;
  772. int ports = rports + wports;
  773. wordlinelength = cols * (RegCellWidth + 2 * ports * BitlineSpacing);
  774. return(array_wordline_power(rows,cols,wordlinelength,rports,wports,cache));
  775. }
  776. double array_bitline_power
  777. (
  778. int rows, int cols,
  779. double bitlinelength,
  780. int rports, int wports,
  781. int cache
  782. )
  783. {
  784. double Ctotal=0;
  785. double Ccolmux=0;
  786. double Cbitrowr=0;
  787. double Cbitroww=0;
  788. double Cprerow=0;
  789. double Cwritebitdrive=0;
  790. double Cpregate=0;
  791. double Cliner=0;
  792. double Clinew=0;
  793. int ports;
  794. double rowsb;
  795. double colsb;
  796. double desiredrisetime, Cline, psize, nsize;
  797. ports = rports + wports;
  798. rowsb = (double)rows;
  799. colsb = (double)cols;
  800. /* Draincaps of access tx's */
  801. Cbitrowr = draincap(Wmemcellr,NCH,1);
  802. Cbitroww = draincap(Wmemcellw,NCH,1);
  803. /* Cprerow -- precharge cap on the bitline
  804. -simple scheme to estimate size of pre-charge tx's in a similar fashion
  805. to wordline driver size estimation.
  806. -FIXME: it would be better to use precharge/keeper pairs, i've omitted this
  807. from this version because it couldn't autosize as easily.
  808. */
  809. desiredrisetime = Period/8;
  810. Cline = rowsb*Cbitrowr+CM2metal*bitlinelength;
  811. psize = driver_size(Cline,desiredrisetime);
  812. /* compensate for not having an nmos pre-charging */
  813. psize = psize + psize * Wdecinvn/Wdecinvp;
  814. if(verbose)
  815. printf("Cprerow auto == %g (psize == %g)\n",draincap(psize,PCH,1),psize);
  816. Cprerow = draincap(psize,PCH,1);
  817. /* Cpregate -- cap due to gatecap of precharge transistors -- tack this
  818. onto bitline cap, again this could have a keeper */
  819. Cpregate = 4.0*gatecap(psize,10.0);
  820. global_clockcap+=rports*cols*2.0*Cpregate;
  821. /* Cwritebitdrive -- write bitline drivers are used instead of the precharge
  822. stuff for write bitlines
  823. - 2 inverter drivers within each driver pair */
  824. Cline = rowsb*Cbitroww+CM2metal*bitlinelength;
  825. psize = driver_size(Cline,desiredrisetime);
  826. nsize = psize * Wdecinvn/Wdecinvp;
  827. Cwritebitdrive = 2.0*(draincap(psize,PCH,1)+draincap(nsize,NCH,1));
  828. /*
  829. reg files (cache==0)
  830. => single ended bitlines (1 bitline/col)
  831. => AFs from pop_count
  832. caches (cache ==1)
  833. => double-ended bitlines (2 bitlines/col)
  834. => AFs = .5 (since one of the two bitlines is always charging/discharging)
  835. */
  836. #ifdef STATIC_AF
  837. if (cache == 0) {
  838. /* compute the total line cap for read/write bitlines */
  839. Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
  840. Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
  841. /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
  842. in cache styles) */
  843. Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
  844. Ctotal+=(1.0-POPCOUNT_AF)*rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
  845. Ctotal+=.3*wports*cols*(Clinew+Cwritebitdrive);
  846. }
  847. else {
  848. Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
  849. Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
  850. Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
  851. Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
  852. Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
  853. }
  854. #else
  855. if (cache == 0) {
  856. /* compute the total line cap for read/write bitlines */
  857. Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow;
  858. Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
  859. /* Bitline inverters at the end of the bitlines (replaced w/ sense amps
  860. in cache styles) */
  861. Ccolmux = gatecap(MSCALE*(29.9+7.8),0.0)+gatecap(MSCALE*(47.0+12.0),0.0);
  862. Ctotal += rports*cols*(Cliner+Ccolmux+2.0*Cpregate);
  863. Ctotal += .3*wports*cols*(Clinew+Cwritebitdrive);
  864. }
  865. else {
  866. Cliner = rowsb*Cbitrowr+CM2metal*bitlinelength+Cprerow + draincap(Wbitmuxn,NCH,1);
  867. Clinew = rowsb*Cbitroww+CM2metal*bitlinelength+Cwritebitdrive;
  868. Ccolmux = (draincap(Wbitmuxn,NCH,1))+2.0*gatecap(WsenseQ1to4,10.0);
  869. Ctotal+=.5*rports*2.0*cols*(Cliner+Ccolmux+2.0*Cpregate);
  870. Ctotal+=.5*wports*2.0*cols*(Clinew+Cwritebitdrive);
  871. }
  872. #endif
  873. if(verbose) {
  874. fprintf(stderr,"Bitline -- Precharge == %g\n",1e12*Cpregate);
  875. fprintf(stderr,"Bitline -- Line == %g\n",1e12*(Cliner+Clinew));
  876. fprintf(stderr,"Bitline -- Line -- access draincap == %g\n",1e12*rowsb*Cbitrowr);
  877. fprintf(stderr,"Bitline -- Line -- precharge draincap == %g\n",1e12*Cprerow);
  878. fprintf(stderr,"Bitline -- Line -- metal == %g\n",1e12*bitlinelength*CM2metal);
  879. fprintf(stderr,"Bitline -- Colmux == %g\n",1e12*Ccolmux);
  880. fprintf(stderr,"\n");
  881. }
  882. if(cache==0)
  883. return(Ctotal*Powerfactor);
  884. else
  885. return(Ctotal*SensePowerfactor*.4);
  886. }
  887. double simple_array_bitline_power
  888. (
  889. int rows, int cols,
  890. int rports, int wports,
  891. int cache
  892. )
  893. {
  894. double bitlinelength;
  895. int ports = rports + wports;
  896. bitlinelength = rows * (RegCellHeight + ports * WordlineSpacing);
  897. return (array_bitline_power(rows,cols,bitlinelength,rports,wports,cache));
  898. }
  899. /* estimate senseamp power dissipation in cache structures (Zyuban's method) */
  900. double senseamp_power(int cols)
  901. {
  902. return((double)cols * Vdd/8 * .5e-3);
  903. }
  904. /* estimate comparator power consumption (this comparator is similar
  905. to the tag-match structure in a CAM */
  906. double compare_cap(int compare_bits)
  907. {
  908. double c1, c2;
  909. /* bottom part of comparator */
  910. c2 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2))+
  911. draincap(Wevalinvp,PCH,1) + draincap(Wevalinvn,NCH,1);
  912. /* top part of comparator */
  913. c1 = (compare_bits)*(draincap(Wcompn,NCH,1)+draincap(Wcompn,NCH,2)+
  914. draincap(Wcomppreequ,NCH,1)) +
  915. gatecap(WdecNORn,1.0)+
  916. gatecap(WdecNORp,3.0);
  917. return(c1 + c2);
  918. }
  919. /* power of depency check logic */
  920. double dcl_compare_power(int compare_bits, const CpuProfile& profile)
  921. {
  922. double Ctotal;
  923. int num_comparators;
  924. num_comparators = (profile.ruu_decode_width - 1) * (profile.ruu_decode_width);
  925. Ctotal = num_comparators * compare_cap(compare_bits);
  926. return(Ctotal*Powerfactor*AF);
  927. }
  928. double simple_array_power
  929. (
  930. int rows, int cols,
  931. int rports, int wports,
  932. int cache
  933. )
  934. {
  935. if(cache==0)
  936. return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
  937. simple_array_wordline_power(rows,cols,rports,wports,cache)+
  938. simple_array_bitline_power(rows,cols,rports,wports,cache));
  939. else
  940. return( simple_array_decoder_power(rows,cols,rports,wports,cache)+
  941. simple_array_wordline_power(rows,cols,rports,wports,cache)+
  942. simple_array_bitline_power(rows,cols,rports,wports,cache)+
  943. senseamp_power(cols));
  944. }
  945. double cam_tagdrive
  946. (
  947. int rows, int cols,
  948. int rports, int wports
  949. )
  950. {
  951. double Ctotal, Ctlcap, Cblcap, Cwlcap;
  952. double taglinelength;
  953. double wordlinelength;
  954. double nsize, psize;
  955. int ports;
  956. Ctotal=0;
  957. ports = rports + wports;
  958. taglinelength = rows *
  959. (CamCellHeight + ports * MatchlineSpacing);
  960. wordlinelength = cols *
  961. (CamCellWidth + ports * TaglineSpacing);
  962. /* Compute tagline cap */
  963. Ctlcap = Cmetal * taglinelength +
  964. rows * gatecappass(Wcomparen2,2.0) +
  965. draincap(Wcompdrivern,NCH,1)+draincap(Wcompdriverp,PCH,1);
  966. /* Compute bitline cap (for writing new tags) */
  967. Cblcap = Cmetal * taglinelength +
  968. rows * draincap(Wmemcellr,NCH,2);
  969. /* autosize wordline driver */
  970. psize = driver_size(Cmetal * wordlinelength + 2 * cols * gatecap(Wmemcellr,2.0),Period/8);
  971. nsize = psize * Wdecinvn/Wdecinvp;
  972. /* Compute wordline cap (for writing new tags) */
  973. Cwlcap = Cmetal * wordlinelength +
  974. draincap(nsize,NCH,1)+draincap(psize,PCH,1) +
  975. 2 * cols * gatecap(Wmemcellr,2.0);
  976. Ctotal += (rports * cols * 2 * Ctlcap) +
  977. (wports * ((cols * 2 * Cblcap) + (rows * Cwlcap)));
  978. return(Ctotal*Powerfactor*AF);
  979. }
  980. double cam_tagmatch
  981. (
  982. int rows, int cols,
  983. int rports, int wports,
  984. int issue_width
  985. )
  986. {
  987. double Ctotal, Cmlcap;
  988. double matchlinelength;
  989. int ports;
  990. Ctotal=0;
  991. ports = rports + wports;
  992. matchlinelength = cols *
  993. (CamCellWidth + ports * TaglineSpacing);
  994. Cmlcap = 2 * cols * draincap(Wcomparen1,NCH,2) +
  995. Cmetal * matchlinelength + draincap(Wmatchpchg,NCH,1) +
  996. gatecap(Wmatchinvn+Wmatchinvp,10.0) +
  997. gatecap(Wmatchnandn+Wmatchnandp,10.0);
  998. Ctotal += rports * rows * Cmlcap;
  999. global_clockcap += rports * rows * gatecap(Wmatchpchg,5.0);
  1000. /* noring the nanded match lines */
  1001. if(issue_width >= 8) /* lym */
  1002. Ctotal += 2 * gatecap(Wmatchnorn+Wmatchnorp,10.0);
  1003. return(Ctotal*Powerfactor*AF);
  1004. }
  1005. double cam_array
  1006. (
  1007. int rows, int cols,
  1008. int rports, int wports,
  1009. int issuewidth
  1010. )
  1011. {
  1012. return(cam_tagdrive(rows,cols,rports,wports) +
  1013. cam_tagmatch(rows,cols,rports,wports,issuewidth)); /* lym */
  1014. }
  1015. double selection_power(int win_entries, int issue_width) /* lym */
  1016. {
  1017. double Ctotal, Cor, Cpencode;
  1018. int num_arbiter=1;
  1019. Ctotal=0;
  1020. while(win_entries > 4)
  1021. {
  1022. win_entries = (int)ceil((double)win_entries / 4.0);
  1023. num_arbiter += win_entries;
  1024. }
  1025. Cor = 4 * draincap(WSelORn,NCH,1) + draincap(WSelORprequ,PCH,1);
  1026. Cpencode = draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,1) +
  1027. 2*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,2) +
  1028. 3*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,3) +
  1029. 4*draincap(WSelPn,NCH,1) + draincap(WSelPp,PCH,4) +
  1030. 4*gatecap(WSelEnn+WSelEnp,20.0) +
  1031. 4*draincap(WSelEnn,NCH,1) + 4*draincap(WSelEnp,PCH,1);
  1032. Ctotal += issue_width * num_arbiter*(Cor+Cpencode); /* lym */
  1033. return(Ctotal*Powerfactor*AF);
  1034. }
  1035. /* very rough clock power estimates */
  1036. double total_clockpower(double die_length, const CpuProfile& profile)
  1037. {
  1038. double clocklinelength;
  1039. double Cline,Cline2,Ctotal;
  1040. double pipereg_clockcap=0;
  1041. double global_buffercap = 0;
  1042. double Clockpower;
  1043. double num_piperegs;
  1044. /* int npreg_width = (int)ceil(logtwo((double)profile.RUU_size)); */ /* lym */
  1045. /* Assume say 8 stages (kinda low now).
  1046. FIXME: this could be a lot better; user could input
  1047. number of pipestages, etc */
  1048. /* assume 8 pipe stages and try to estimate bits per pipe stage */
  1049. /* pipe stage 0/1 */
  1050. num_piperegs = profile.ruu_issue_width*inst_length + profile.data_width;
  1051. /* pipe stage 1/2 */
  1052. num_piperegs += profile.ruu_issue_width*(inst_length + 3 * profile.RUU_size);
  1053. /* pipe stage 2/3 */
  1054. num_piperegs += profile.ruu_issue_width*(inst_length + 3 * profile.RUU_size);
  1055. /* pipe stage 3/4 */
  1056. num_piperegs += profile.ruu_issue_width*(3 * npreg_width + pow2(opcode_length));
  1057. /* pipe stage 4/5 */
  1058. num_piperegs += profile.ruu_issue_width*(2*profile.data_width + pow2(opcode_length));
  1059. /* pipe stage 5/6 */
  1060. num_piperegs += profile.ruu_issue_width*(profile.data_width + pow2(opcode_length));
  1061. /* pipe stage 6/7 */
  1062. num_piperegs += profile.ruu_issue_width*(profile.data_width + pow2(opcode_length));
  1063. /* pipe stage 7/8 */
  1064. num_piperegs += profile.ruu_issue_width*(profile.data_width + pow2(opcode_length));
  1065. /* assume 50% extra in control signals (rule of thumb) */
  1066. num_piperegs = num_piperegs * 1.5;
  1067. pipereg_clockcap = num_piperegs * 4*gatecap(10.0,0);
  1068. /* estimate based on 3% of die being in clock metal */
  1069. Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
  1070. /* another estimate */
  1071. clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
  1072. Cline = 20 * Cmetal * (clocklinelength) * 1e6;
  1073. global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
  1074. /* global_clockcap is computed within each array structure for pre-charge tx's*/
  1075. Ctotal = Cline+global_clockcap+pipereg_clockcap+global_buffercap;
  1076. if(verbose)
  1077. fprintf(stderr,"num_piperegs == %f\n",num_piperegs);
  1078. /* add I_ADD Clockcap and F_ADD Clockcap */
  1079. Clockpower = Ctotal*Powerfactor + profile.res_ialu*I_ADD_CLOCK + profile.res_fpalu*F_ADD_CLOCK;
  1080. if(verbose) {
  1081. fprintf(stderr,"Global Clock Power: %g\n",Clockpower);
  1082. fprintf(stderr," Global Metal Lines (W): %g\n",Cline*Powerfactor);
  1083. fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
  1084. fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
  1085. fprintf(stderr," Global Clock Cap (Explicit) (W): %g\n",global_clockcap*Powerfactor+I_ADD_CLOCK+F_ADD_CLOCK);
  1086. fprintf(stderr," Global Clock Cap (Implicit) (W): %g\n",pipereg_clockcap*Powerfactor);
  1087. }
  1088. return(Clockpower);
  1089. }
  1090. /* very rough global clock power estimates */
  1091. double global_clockpower(double die_length)
  1092. {
  1093. double clocklinelength;
  1094. double Cline,Cline2,Ctotal;
  1095. double global_buffercap = 0;
  1096. Cline2 = Cmetal * (.03 * die_length * die_length/BitlineSpacing) * 1e6 * 1e6;
  1097. clocklinelength = die_length*(.5 + 4 * (.25 + 2*(.25) + 4 * (.125)));
  1098. Cline = 20 * Cmetal * (clocklinelength) * 1e6;
  1099. global_buffercap = 12*gatecap(1000.0,10.0)+16*gatecap(200,10.0)+16*8*2*gatecap(100.0,10.00) + 2*gatecap(.29*1e6,10.0);
  1100. Ctotal = Cline+global_buffercap;
  1101. if(verbose) {
  1102. fprintf(stderr,"Global Clock Power: %g\n",Ctotal*Powerfactor);
  1103. fprintf(stderr," Global Metal Lines (W): %g\n",Cline*Powerfactor);
  1104. fprintf(stderr," Global Metal Lines (3%%) (W): %g\n",Cline2*Powerfactor);
  1105. fprintf(stderr," Global Clock Buffers (W): %g\n",global_buffercap*Powerfactor);
  1106. }
  1107. return(Ctotal*Powerfactor);
  1108. }
  1109. double compute_resultbus_power(const CpuProfile& profile)
  1110. {
  1111. double Ctotal, Cline;
  1112. double regfile_height;
  1113. /* compute size of result bus tags */
  1114. /* int npreg_width = (int)ceil(logtwo((double)profile.RUU_size)); */ /* lym */
  1115. Ctotal=0;
  1116. /*Changed to account for alpha ev6 clustering. Right now the length will
  1117. be to the integer ruu*/
  1118. regfile_height = profile.RUU_size * (RegCellHeight +
  1119. WordlineSpacing * 10);
  1120. /* assume num alu's == ialu (FIXME: generate a more detailed result bus network model*/
  1121. Cline = Cmetal * (regfile_height + .5 * profile.res_ialu * 3200.0 * LSCALE);
  1122. /* or use result bus length measured from 21264 die photo */
  1123. /* Cline = Cmetal * 3.3*1000;*/
  1124. /* Assume profile.ruu_issue_width result busses -- power can be scaled linearly
  1125. for number of result busses (scale by writeback_access) */
  1126. Ctotal += 2.0 * (profile.data_width + npreg_width) * 6* Cline; /* changed for ev6 */
  1127. #ifdef STATIC_AF
  1128. return(Ctotal*Powerfactor*AF);
  1129. #else
  1130. return(Ctotal*Powerfactor);
  1131. #endif
  1132. }
  1133. void calculate_power(power_result_type *power, const CpuProfile& profile)
  1134. {
  1135. Mhz = profile.Mhz;
  1136. Vdd = profile.vdd;
  1137. double clockpower;
  1138. double predeclength, wordlinelength, bitlinelength;
  1139. int cache;
  1140. //int ndwl, ndbl, nspd, ntwl, ntbl, ntspd, c,b,a,cache, rowsb, colsb;
  1141. //int trowsb, tcolsb, tagsize;
  1142. //int va_size = 48;
  1143. /* int npreg_width = (int)ceil(logtwo((double)profile.RUU_size)); *//* lym */
  1144. /* these variables are needed to use Cacti to auto-size cache arrays
  1145. (for optimal delay) */
  1146. // time_result_type time_result;
  1147. //time_parameter_type time_parameters;
  1148. /* used to autosize other structures, like bpred tables */
  1149. //int scale_factor;
  1150. global_clockcap = 0;
  1151. cache=0;
  1152. #ifdef DYNAMIC_AF
  1153. //double window_af_b, lsq_af_b;
  1154. //window_af_b = compute_af(window_num_pop_count_cycle,window_total_pop_count_cycle,data_width);
  1155. //lsq_af_b = compute_af(lsq_num_pop_count_cycle,lsq_total_pop_count_cycle,data_width);
  1156. power->regfile_af_b =
  1157. compute_af( regfile_num_pop_count_cycle,
  1158. regfile_total_pop_count_cycle,
  1159. profile.data_width);
  1160. power->resultbus_af_b =
  1161. compute_af(resultbus_num_pop_count_cycle,
  1162. resultbus_total_pop_count_cycle,
  1163. profile.data_width);
  1164. #endif
  1165. /* FIXME: ALU power is a simple constant, it would be better
  1166. to include bit AFs and have different numbers for different
  1167. types of operations */
  1168. power->ialu_power = profile.res_ialu * I_ADD;
  1169. power->falu_power = profile.res_fpalu * F_ADD;
  1170. nvreg_width = (int)ceil(logtwo((double)MD_NUM_IREGS));
  1171. npreg_width = (int)ceil(logtwo((double)profile.RUU_size));
  1172. /* RAT has shadow bits stored in each cell, this makes the
  1173. cell size larger than normal array structures, so we must
  1174. compute it here */
  1175. predeclength = MD_NUM_IREGS *
  1176. (RatCellHeight + 3 * profile.ruu_decode_width * WordlineSpacing);
  1177. wordlinelength = npreg_width *
  1178. (RatCellWidth +
  1179. 6 * profile.ruu_decode_width * BitlineSpacing +
  1180. RatShiftRegWidth*RatNumShift);
  1181. bitlinelength = MD_NUM_IREGS * (RatCellHeight + 3 * profile.ruu_decode_width * WordlineSpacing);
  1182. if(verbose)
  1183. fprintf(stderr,"rat power stats\n");
  1184. /*changed to a 80 entry cam, since it says so in the 21264 micro paper*/
  1185. power->rat_decoder = cam_tagdrive(profile.RUU_size,
  1186. npreg_width,
  1187. profile.ruu_decode_width,
  1188. profile.ruu_decode_width);
  1189. power->rat_wordline = cam_tagmatch(profile.RUU_size,
  1190. npreg_width,
  1191. profile.ruu_decode_width,
  1192. profile.ruu_decode_width,
  1193. profile.ruu_decode_width);
  1194. power->rat_bitline = 0;
  1195. power->rat_senseamp = 0;
  1196. power->dcl_compare = dcl_compare_power(nvreg_width, profile);
  1197. power->dcl_pencode = 0;
  1198. power->inst_decoder_power = profile.ruu_decode_width * simple_array_decoder_power(opcode_length,1,1,1,cache);
  1199. /* power->wakeup_tagdrive =cam_tagdrive(profile.RUU_size,npreg_width,profile.ruu_issue_width,profile.ruu_issue_width);
  1200. power->wakeup_tagmatch =cam_tagmatch(profile.RUU_size,npreg_width,profile.ruu_issue_width,profile.ruu_issue_width);
  1201. power->wakeup_ormatch =0; */
  1202. /* lym
  1203. power->iq_wakeup_tagdrive =cam_tagdrive(IQ_size,npreg_width,iq_issue_width,iq_issue_width);
  1204. power->iq_wakeup_tagmatch =cam_tagmatch(IQ_size,npreg_width,iq_issue_width,iq_issue_width,iq_issue_width);
  1205. power->iq_wakeup_ormatch =0;
  1206. power->fpq_wakeup_tagdrive =cam_tagdrive(FPQ_size,npreg_width,fpq_issue_width,fpq_issue_width);
  1207. power->fpq_wakeup_tagmatch =cam_tagmatch(FPQ_size,npreg_width,fpq_issue_width,fpq_issue_width,fpq_issue_width);
  1208. power->fpq_wakeup_ormatch =0;
  1209. lym */
  1210. /* power->selection = selection_power(profile.RUU_size); */
  1211. /* lym
  1212. power->iq_selection = selection_power(IQ_size,iq_issue_width);
  1213. power->fpq_selection = selection_power(FPQ_size,fpq_issue_width);
  1214. power->lsq_selection = selection_power(profile.LSQ_size,lsq_issue_width);
  1215. lym */
  1216. /*special numbers for alpha 21264*/
  1217. /*I'm reducing issue width to 1/3 of the normal, since every cluster is just 2 wide
  1218. and then I'm just going to add the 2 integer clusters up, leaving the fp cluster aside for the
  1219. moment*/
  1220. predeclength = MD_NUM_IREGS * (RegCellHeight + 2 * profile.ruu_issue_width * WordlineSpacing);
  1221. wordlinelength = profile.data_width *
  1222. (RegCellWidth +
  1223. 4 * profile.ruu_issue_width * BitlineSpacing);
  1224. bitlinelength = MD_NUM_IREGS * (RegCellHeight + 2 * profile.ruu_issue_width* WordlineSpacing);
  1225. if(verbose)
  1226. fprintf(stderr,"regfile power stats\n");
  1227. power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,profile.data_width,predeclength,
  1228. 4/3*profile.ruu_issue_width ,profile.ruu_issue_width*2/3,cache);
  1229. power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,profile.data_width,wordlinelength,
  1230. 4/3*profile.ruu_issue_width,profile.ruu_issue_width*2/3 ,cache);
  1231. power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,profile.data_width,bitlinelength,
  1232. 4/3*profile.ruu_issue_width,profile.ruu_issue_width*2/3,cache);
  1233. power->regfile_senseamp =0;
  1234. /*
  1235. predeclength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
  1236. wordlinelength = profile.data_width *
  1237. (RegCellWidth +
  1238. 6 * ruu_issue_width * BitlineSpacing);
  1239. bitlinelength = MD_NUM_IREGS * (RegCellHeight + 3 * ruu_issue_width * WordlineSpacing);
  1240. if(verbose)
  1241. fprintf(stderr,"regfile power stats\n");
  1242. power->regfile_decoder = array_decoder_power(MD_NUM_IREGS,profile.data_width,predeclength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
  1243. power->regfile_wordline = array_wordline_power(MD_NUM_IREGS,profile.data_width,wordlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
  1244. power->regfile_bitline = array_bitline_power(MD_NUM_IREGS,profile.data_width,bitlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
  1245. power->regfile_senseamp =0;
  1246. */
  1247. /*Again for alpha*/
  1248. /*for int_window*/
  1249. /*4 Read ports, 6 Write ports per int regfile*/
  1250. predeclength = profile.RUU_size * (RegCellHeight + /*2 * ruu_issue_width */10 * WordlineSpacing);
  1251. wordlinelength = profile.data_width *
  1252. (RegCellWidth +
  1253. /*4 * ruu_issue_width */20* BitlineSpacing);
  1254. bitlinelength = profile.RUU_size * (RegCellHeight +/* 2 * ruu_issue_width*/ 10* WordlineSpacing);
  1255. if(verbose)
  1256. fprintf(stderr,"res station power stats\n");
  1257. power->rs_decoder = array_decoder_power(profile.RUU_size,profile.data_width,predeclength,4,6/*2/3*profile.ruu_issue_width,profile.ruu_issue_width*/,cache);
  1258. power->rs_wordline = array_wordline_power(profile.RUU_size,profile.data_width,wordlinelength,4,6/*2/3*profile.ruu_issue_width,profile.ruu_issue_width*/,cache);
  1259. power->rs_bitline = array_bitline_power(profile.RUU_size,profile.data_width,bitlinelength,4,6/*2/3*profile.ruu_issue_width,profile.ruu_issue_width*/,cache);
  1260. /*for fp_window*/
  1261. /*4W, 4R*/
  1262. predeclength = 72 * (RegCellHeight + /*profile.ruu_issue_width*/8 * WordlineSpacing);
  1263. wordlinelength = profile.data_width *
  1264. (RegCellWidth +
  1265. /*2 * profile.ruu_issue_width*/16 * BitlineSpacing);
  1266. bitlinelength = 72 * (RegCellHeight + /*profile.ruu_issue_width*/8 * WordlineSpacing);
  1267. if(verbose)
  1268. fprintf(stderr,"res station power stats\n");
  1269. /*power->rs_decoder*/fp_reg_power += array_decoder_power(72,profile.data_width,predeclength,4,4/*2/3*profile.ruu_issue_width,2/3*profile.ruu_issue_width*/,cache);
  1270. /*power->rs_wordline*/fp_reg_power += array_wordline_power(72,profile.data_width,wordlinelength,4,4/*2/3*profile.ruu_issue_width,2/3*profile.ruu_issue_width*/,cache);
  1271. /*power->rs_bitline*/fp_reg_power += array_bitline_power(72,profile.data_width,bitlinelength,4,4 /*2/3*profile.ruu_issue_width,2/3*profile.ruu_issue_width*/,cache);
  1272. /*
  1273. predeclength = profile.RUU_size * (RegCellHeight + 3 * profile.ruu_issue_width * WordlineSpacing);
  1274. wordlinelength = profile.data_width *
  1275. (RegCellWidth +
  1276. 6 * profile.ruu_issue_width * BitlineSpacing);
  1277. bitlinelength = profile.RUU_size * (RegCellHeight + 3 * profile.ruu_issue_width * WordlineSpacing);
  1278. if(verbose)
  1279. fprintf(stderr,"res station power stats\n");
  1280. power->rs_decoder = array_decoder_power(profile.RUU_size,profile.data_width,predeclength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
  1281. power->rs_wordline = array_wordline_power(profile.RUU_size,profile.data_width,wordlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
  1282. power->rs_bitline = array_bitline_power(profile.RUU_size,profile.data_width,bitlinelength,2*profile.ruu_issue_width,profile.ruu_issue_width,cache);
  1283. */
  1284. /* no senseamps in reg file structures (only caches) */
  1285. power->rs_senseamp =0;
  1286. /* addresses go into lsq tag's
  1287. power->lsq_wakeup_tagdrive =cam_tagdrive(profile.LSQ_size,profile.data_width,profile.res_memport,profile.res_memport);
  1288. power->lsq_wakeup_tagmatch =cam_tagmatch(profile.LSQ_size,profile.data_width,profile.res_memport,profile.res_memport,lsq_issue_width);
  1289. power->lsq_wakeup_ormatch =0;
  1290. */
  1291. wordlinelength = profile.data_width *
  1292. (RegCellWidth +
  1293. 4 * profile.res_memport * BitlineSpacing);
  1294. bitlinelength = profile.RUU_size * (RegCellHeight + 4 * profile.res_memport * WordlineSpacing);
  1295. /* rs's hold data */
  1296. if(verbose)
  1297. fprintf(stderr,"lsq station power stats\n");
  1298. power->lsq_rs_decoder = array_decoder_power(profile.LSQ_size,profile.data_width,predeclength,profile.res_memport,profile.res_memport,cache);
  1299. power->lsq_rs_wordline = array_wordline_power(profile.LSQ_size,profile.data_width,wordlinelength,profile.res_memport,profile.res_memport,cache);
  1300. power->lsq_rs_bitline = array_bitline_power(profile.LSQ_size,profile.data_width,bitlinelength,profile.res_memport,profile.res_memport,cache);
  1301. power->lsq_rs_senseamp =0;
  1302. power->resultbus = compute_resultbus_power(profile);
  1303. /*
  1304. // Load cache values into what cacti is expecting
  1305. time_parameters.cache_size = btb_config[0] * (profile.data_width/8) * btb_config[1]; // C
  1306. time_parameters.block_size = (profile.data_width/8); // B
  1307. time_parameters.associativity = btb_config[1]; // A
  1308. time_parameters.number_of_sets = btb_config[0]; // C/(B*A)
  1309. // have Cacti compute optimal cache config
  1310. calculate_time(&time_result,&time_parameters);
  1311. output_data(&time_result,&time_parameters);
  1312. // extract Cacti results
  1313. ndwl=time_result.best_Ndwl;
  1314. ndbl=time_result.best_Ndbl;
  1315. nspd=time_result.best_Nspd;
  1316. ntwl=time_result.best_Ntwl;
  1317. ntbl=time_result.best_Ntbl;
  1318. ntspd=time_result.best_Ntspd;
  1319. c = time_parameters.cache_size;
  1320. b = time_parameters.block_size;
  1321. a = time_parameters.associativity;
  1322. cache=1;
  1323. // Figure out how many rows/cols there are now
  1324. rowsb = c/(b*a*ndbl*nspd);
  1325. colsb = 8*b*a*nspd/ndwl;
  1326. if(verbose) {
  1327. fprintf(stderr,"%d KB %d-way btb (%d-byte block size):\n",c,a,b);
  1328. fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
  1329. fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
  1330. }
  1331. predeclength = rowsb * (RegCellHeight + WordlineSpacing);
  1332. wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
  1333. bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
  1334. if(verbose)
  1335. fprintf(stderr,"btb power stats\n");
  1336. power->btb = ndwl*ndbl*(array_decoder_power(rowsb,colsb,predeclength,1,1,cache) + array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache) + array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache) + senseamp_power(colsb));
  1337. cache=1;
  1338. scale_factor = squarify(twolev_config[0],twolev_config[2]);
  1339. predeclength = (twolev_config[0] / scale_factor)* (RegCellHeight + WordlineSpacing);
  1340. wordlinelength = twolev_config[2] * scale_factor * (RegCellWidth + BitlineSpacing);
  1341. bitlinelength = (twolev_config[0] / scale_factor) * (RegCellHeight + WordlineSpacing);
  1342. if(verbose)
  1343. fprintf(stderr,"local predict power stats\n");
  1344. power->local_predict = array_decoder_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[0]/scale_factor,twolev_config[2]*scale_factor,bitlinelength,1,1,cache) + senseamp_power(twolev_config[2]*scale_factor);
  1345. scale_factor = squarify(twolev_config[1],3);
  1346. predeclength = (twolev_config[1] / scale_factor)* (RegCellHeight + WordlineSpacing);
  1347. wordlinelength = 3 * scale_factor * (RegCellWidth + BitlineSpacing);
  1348. bitlinelength = (twolev_config[1] / scale_factor) * (RegCellHeight + WordlineSpacing);
  1349. if(verbose)
  1350. fprintf(stderr,"local predict power stats\n");
  1351. power->local_predict += array_decoder_power(twolev_config[1]/scale_factor,3*scale_factor,predeclength,1,1,cache) + array_wordline_power(twolev_config[1]/scale_factor,3*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(twolev_config[1]/scale_factor,3*scale_factor,bitlinelength,1,1,cache) + senseamp_power(3*scale_factor);
  1352. if(verbose)
  1353. fprintf(stderr,"bimod_config[0] == %d\n",bimod_config[0]);
  1354. scale_factor = squarify(bimod_config[0],2);
  1355. predeclength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
  1356. wordlinelength = 2*scale_factor * (RegCellWidth + BitlineSpacing);
  1357. bitlinelength = bimod_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
  1358. if(verbose)
  1359. fprintf(stderr,"global predict power stats\n");
  1360. power->global_predict = array_decoder_power(bimod_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(bimod_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(bimod_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
  1361. scale_factor = squarify(comb_config[0],2);
  1362. predeclength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
  1363. wordlinelength = 2*scale_factor * (RegCellWidth + BitlineSpacing);
  1364. bitlinelength = comb_config[0]/scale_factor * (RegCellHeight + WordlineSpacing);
  1365. if(verbose)
  1366. fprintf(stderr,"chooser predict power stats\n");
  1367. power->chooser = array_decoder_power(comb_config[0]/scale_factor,2*scale_factor,predeclength,1,1,cache) + array_wordline_power(comb_config[0]/scale_factor,2*scale_factor,wordlinelength,1,1,cache) + array_bitline_power(comb_config[0]/scale_factor,2*scale_factor,bitlinelength,1,1,cache) + senseamp_power(2*scale_factor);
  1368. */
  1369. if(verbose)
  1370. fprintf(stderr,"RAS predict power stats\n");
  1371. power->ras = simple_array_power(ras_size,profile.data_width,1,1,0);
  1372. /*
  1373. tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
  1374. if(verbose)
  1375. fprintf(stderr,"dtlb predict power stats\n");
  1376. power->dtlb = profile.res_memport*(cam_array(dtlb->nsets, va_size - (int)logtwo((double)dtlb->bsize),1,1,1) + simple_array_power(dtlb->nsets,tagsize,1,1,cache));
  1377. tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
  1378. */
  1379. /*
  1380. predeclength = itlb->nsets * (RegCellHeight + WordlineSpacing);
  1381. wordlinelength = logtwo((double)itlb->bsize) * (RegCellWidth + BitlineSpacing);
  1382. bitlinelength = itlb->nsets * (RegCellHeight + WordlineSpacing);
  1383. if(verbose)
  1384. fprintf(stderr,"itlb predict power stats\n");
  1385. power->itlb = cam_array(itlb->nsets, va_size - (int)logtwo((double)itlb->bsize),1,1,1) + simple_array_power(itlb->nsets,tagsize,1,1,cache);
  1386. cache=1;
  1387. time_parameters.cache_size = cache_il1->nsets * cache_il1->bsize * cache_il1->assoc; // C
  1388. time_parameters.block_size = cache_il1->bsize; // B
  1389. time_parameters.associativity = cache_il1->assoc; // A
  1390. time_parameters.number_of_sets = cache_il1->nsets; // C/(B*A)
  1391. calculate_time(&time_result,&time_parameters);
  1392. output_data(&time_result,&time_parameters);
  1393. ndwl=time_result.best_Ndwl;
  1394. ndbl=time_result.best_Ndbl;
  1395. nspd=time_result.best_Nspd;
  1396. ntwl=time_result.best_Ntwl;
  1397. ntbl=time_result.best_Ntbl;
  1398. ntspd=time_result.best_Ntspd;
  1399. c = time_parameters.cache_size;
  1400. b = time_parameters.block_size;
  1401. a = time_parameters.associativity;
  1402. rowsb = c/(b*a*ndbl*nspd);
  1403. colsb = 8*b*a*nspd/ndwl;
  1404. tagsize = va_size - ((int)logtwo(cache_il1->nsets) + (int)logtwo(cache_il1->bsize));
  1405. trowsb = c/(b*a*ntbl*ntspd);
  1406. tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
  1407. if(verbose) {
  1408. fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
  1409. fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
  1410. fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
  1411. fprintf(stderr,"tagsize == %d\n",tagsize);
  1412. }
  1413. predeclength = rowsb * (RegCellHeight + WordlineSpacing);
  1414. wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
  1415. bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
  1416. if(verbose)
  1417. fprintf(stderr,"icache power stats\n");
  1418. power->icache_decoder = ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
  1419. power->icache_wordline = ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
  1420. power->icache_bitline = ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
  1421. power->icache_senseamp = ndwl*ndbl*senseamp_power(colsb);
  1422. power->icache_tagarray = ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
  1423. power->icache_power = power->icache_decoder + power->icache_wordline + power->icache_bitline + power->icache_senseamp + power->icache_tagarray;
  1424. time_parameters.cache_size = cache_dl1->nsets * cache_dl1->bsize * cache_dl1->assoc; // C
  1425. time_parameters.block_size = cache_dl1->bsize; // B
  1426. time_parameters.associativity = cache_dl1->assoc; // A
  1427. time_parameters.number_of_sets = cache_dl1->nsets; // C/(B*A)
  1428. calculate_time(&time_result,&time_parameters);
  1429. output_data(&time_result,&time_parameters);
  1430. ndwl=time_result.best_Ndwl;
  1431. ndbl=time_result.best_Ndbl;
  1432. nspd=time_result.best_Nspd;
  1433. ntwl=time_result.best_Ntwl;
  1434. ntbl=time_result.best_Ntbl;
  1435. ntspd=time_result.best_Ntspd;
  1436. c = time_parameters.cache_size;
  1437. b = time_parameters.block_size;
  1438. a = time_parameters.associativity;
  1439. cache=1;
  1440. rowsb = c/(b*a*ndbl*nspd);
  1441. colsb = 8*b*a*nspd/ndwl;
  1442. tagsize = va_size - ((int)logtwo(cache_dl1->nsets) + (int)logtwo(cache_dl1->bsize));
  1443. trowsb = c/(b*a*ntbl*ntspd);
  1444. tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
  1445. if(verbose) {
  1446. fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
  1447. fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
  1448. fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
  1449. fprintf(stderr,"tagsize == %d\n",tagsize);
  1450. fprintf(stderr,"\nntwl == %d, ntbl == %d, ntspd == %d\n",ntwl,ntbl,ntspd);
  1451. fprintf(stderr,"%d sets of %d rows x %d cols\n",ntwl*ntbl,trowsb,tcolsb);
  1452. }
  1453. predeclength = rowsb * (RegCellHeight + WordlineSpacing);
  1454. wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
  1455. bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
  1456. if(verbose)
  1457. fprintf(stderr,"dcache power stats\n");
  1458. power->dcache_decoder = profile.res_memport*ndwl*ndbl*array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
  1459. power->dcache_wordline = profile.res_memport*ndwl*ndbl*array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
  1460. power->dcache_bitline = profile.res_memport*ndwl*ndbl*array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
  1461. power->dcache_senseamp = profile.res_memport*ndwl*ndbl*senseamp_power(colsb);
  1462. power->dcache_tagarray = profile.res_memport*ntwl*ntbl*(simple_array_power(trowsb,tcolsb,1,1,cache));
  1463. power->dcache_power = power->dcache_decoder + power->dcache_wordline + power->dcache_bitline + power->dcache_senseamp + power->dcache_tagarray;
  1464. */
  1465. //clockpower = total_clockpower(.018);
  1466. clockpower = total_clockpower(.007, profile);
  1467. power->clock_power = clockpower;
  1468. /*
  1469. if(verbose) {
  1470. fprintf(stderr,"result bus power == %f\n",power->resultbus);
  1471. fprintf(stderr,"global clock power == %f\n",clockpower);
  1472. }
  1473. time_parameters.cache_size = cache_dl2->nsets * cache_dl2->bsize * cache_dl2->assoc; // C
  1474. time_parameters.block_size = cache_dl2->bsize; // B
  1475. time_parameters.associativity = cache_dl2->assoc; // A
  1476. time_parameters.number_of_sets = cache_dl2->nsets; // C/(B*A)
  1477. calculate_time(&time_result,&time_parameters);
  1478. output_data(&time_result,&time_parameters);
  1479. ndwl=time_result.best_Ndwl;
  1480. ndbl=time_result.best_Ndbl;
  1481. nspd=time_result.best_Nspd;
  1482. ntwl=time_result.best_Ntwl;
  1483. ntbl=time_result.best_Ntbl;
  1484. ntspd=time_result.best_Ntspd;
  1485. c = time_parameters.cache_size;
  1486. b = time_parameters.block_size;
  1487. a = time_parameters.associativity;
  1488. rowsb = c/(b*a*ndbl*nspd);
  1489. colsb = 8*b*a*nspd/ndwl;
  1490. tagsize = va_size - ((int)logtwo(cache_dl2->nsets) + (int)logtwo(cache_dl2->bsize));
  1491. trowsb = c/(b*a*ntbl*ntspd);
  1492. tcolsb = a * (tagsize + 1 + 6) * ntspd/ntwl;
  1493. if(verbose) {
  1494. fprintf(stderr,"%d KB %d-way cache (%d-byte block size):\n",c,a,b);
  1495. fprintf(stderr,"ndwl == %d, ndbl == %d, nspd == %d\n",ndwl,ndbl,nspd);
  1496. fprintf(stderr,"%d sets of %d rows x %d cols\n",ndwl*ndbl,rowsb,colsb);
  1497. fprintf(stderr,"tagsize == %d\n",tagsize);
  1498. }
  1499. predeclength = rowsb * (RegCellHeight + WordlineSpacing);
  1500. wordlinelength = colsb * (RegCellWidth + BitlineSpacing);
  1501. bitlinelength = rowsb * (RegCellHeight + WordlineSpacing);
  1502. if(verbose)
  1503. fprintf(stderr,"dcache2 power stats\n");
  1504. power->dcache2_decoder = array_decoder_power(rowsb,colsb,predeclength,1,1,cache);
  1505. power->dcache2_wordline = array_wordline_power(rowsb,colsb,wordlinelength,1,1,cache);
  1506. power->dcache2_bitline = array_bitline_power(rowsb,colsb,bitlinelength,1,1,cache);
  1507. power->dcache2_senseamp = senseamp_power(colsb);
  1508. power->dcache2_tagarray = simple_array_power(trowsb,tcolsb,1,1,cache);
  1509. power->dcache2_power = power->dcache2_decoder + power->dcache2_wordline + power->dcache2_bitline + power->dcache2_senseamp + power->dcache2_tagarray;
  1510. power->rat_decoder *= crossover_scaling;
  1511. power->rat_wordline *= crossover_scaling;
  1512. power->rat_bitline *= crossover_scaling;
  1513. power->dcl_compare *= crossover_scaling;
  1514. power->dcl_pencode *= crossover_scaling;
  1515. power->inst_decoder_power *= crossover_scaling;
  1516. */
  1517. /* power->wakeup_tagdrive *= crossover_scaling;
  1518. power->wakeup_tagmatch *= crossover_scaling;
  1519. power->wakeup_ormatch *= crossover_scaling; */
  1520. /* lym
  1521. power->iq_wakeup_tagdrive *= crossover_scaling;
  1522. power->iq_wakeup_tagmatch *= crossover_scaling;
  1523. power->iq_wakeup_ormatch *= crossover_scaling;
  1524. power->fpq_wakeup_tagdrive *= crossover_scaling;
  1525. power->fpq_wakeup_tagmatch *= crossover_scaling;
  1526. power->fpq_wakeup_ormatch *= crossover_scaling;
  1527. lym */
  1528. /* power->selection *= crossover_scaling; */
  1529. /* lym
  1530. power->iq_selection *= crossover_scaling;
  1531. power->fpq_selection *= crossover_scaling;
  1532. power->lsq_selection *= crossover_scaling;
  1533. lym */
  1534. power->regfile_decoder *= crossover_scaling;
  1535. power->regfile_wordline *= crossover_scaling;
  1536. power->regfile_bitline *= crossover_scaling;
  1537. power->regfile_senseamp *= crossover_scaling;
  1538. power->rs_decoder *= crossover_scaling;
  1539. power->rs_wordline *= crossover_scaling;
  1540. power->rs_bitline *= crossover_scaling;
  1541. power->rs_senseamp *= crossover_scaling;
  1542. power->lsq_wakeup_tagdrive *= crossover_scaling;
  1543. power->lsq_wakeup_tagmatch *= crossover_scaling;
  1544. power->lsq_rs_decoder *= crossover_scaling;
  1545. power->lsq_rs_wordline *= crossover_scaling;
  1546. power->lsq_rs_bitline *= crossover_scaling;
  1547. power->lsq_rs_senseamp *= crossover_scaling;
  1548. power->resultbus *= crossover_scaling;
  1549. power->btb *= crossover_scaling;
  1550. power->local_predict *= crossover_scaling;
  1551. power->global_predict *= crossover_scaling;
  1552. power->chooser *= crossover_scaling;
  1553. /*
  1554. power->dtlb *= crossover_scaling;
  1555. power->itlb *= crossover_scaling;
  1556. power->icache_decoder *= crossover_scaling;
  1557. power->icache_wordline*= crossover_scaling;
  1558. power->icache_bitline *= crossover_scaling;
  1559. power->icache_senseamp*= crossover_scaling;
  1560. power->icache_tagarray*= crossover_scaling;
  1561. power->icache_power *= crossover_scaling;
  1562. power->dcache_decoder *= crossover_scaling;
  1563. power->dcache_wordline *= crossover_scaling;
  1564. power->dcache_bitline *= crossover_scaling;
  1565. power->dcache_senseamp *= crossover_scaling;
  1566. power->dcache_tagarray *= crossover_scaling;
  1567. power->dcache_power *= crossover_scaling;
  1568. power->clock_power *= crossover_scaling;
  1569. power->dcache2_decoder *= crossover_scaling;
  1570. power->dcache2_wordline *= crossover_scaling;
  1571. power->dcache2_bitline *= crossover_scaling;
  1572. power->dcache2_senseamp *= crossover_scaling;
  1573. power->dcache2_tagarray *= crossover_scaling;
  1574. power->dcache2_power *= crossover_scaling;
  1575. */
  1576. power->total_power = power->local_predict + power->global_predict +
  1577. power->chooser + power->btb +
  1578. power->rat_decoder + power->rat_wordline +
  1579. power->rat_bitline + power->rat_senseamp +
  1580. power->dcl_compare + power->dcl_pencode +
  1581. power->inst_decoder_power +
  1582. /* power->wakeup_tagdrive + power->wakeup_tagmatch + */ /* lym */
  1583. power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch +
  1584. power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch +
  1585. /* power->selection *//* lym */ power->iq_selection + power->fpq_selection
  1586. + power->lsq_selection +
  1587. power->regfile_decoder + power->regfile_wordline +
  1588. power->regfile_bitline + power->regfile_senseamp +
  1589. power->rs_decoder + power->rs_wordline +
  1590. power->rs_bitline + power->rs_senseamp +
  1591. power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
  1592. power->lsq_rs_decoder + power->lsq_rs_wordline +
  1593. power->lsq_rs_bitline + power->lsq_rs_senseamp +
  1594. power->resultbus +
  1595. power->clock_power +
  1596. power->icache_power +
  1597. power->itlb +
  1598. power->dcache_power +
  1599. power->dtlb +
  1600. power->dcache2_power;
  1601. power->total_power_nodcache2 =power->local_predict + power->global_predict +
  1602. power->chooser + power->btb +
  1603. power->rat_decoder + power->rat_wordline +
  1604. power->rat_bitline + power->rat_senseamp +
  1605. power->dcl_compare + power->dcl_pencode +
  1606. power->inst_decoder_power +
  1607. /* power->wakeup_tagdrive + power->wakeup_tagmatch + */ /* lym */
  1608. power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch +
  1609. power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch +
  1610. /* power->selection */ /* lym */ power->iq_selection +
  1611. power->fpq_selection +
  1612. power->lsq_selection +
  1613. power->regfile_decoder + power->regfile_wordline +
  1614. power->regfile_bitline + power->regfile_senseamp +
  1615. power->rs_decoder + power->rs_wordline +
  1616. power->rs_bitline + power->rs_senseamp +
  1617. power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch +
  1618. power->lsq_rs_decoder + power->lsq_rs_wordline +
  1619. power->lsq_rs_bitline + power->lsq_rs_senseamp +
  1620. power->resultbus +
  1621. power->clock_power +
  1622. power->icache_power +
  1623. power->itlb +
  1624. power->dcache_power +
  1625. power->dtlb +
  1626. power->dcache2_power;
  1627. power->bpred_power = power->btb + power->local_predict + power->global_predict + power->chooser + power->ras;
  1628. power->rat_power = power->rat_decoder +
  1629. power->rat_wordline + power->rat_bitline + power->rat_senseamp;
  1630. power->dcl_power = power->dcl_compare + power->dcl_pencode;
  1631. power->rename_power = power->rat_power +
  1632. power->dcl_power +
  1633. power->inst_decoder_power;
  1634. /* power->wakeup_power = power->wakeup_tagdrive + power->wakeup_tagmatch +
  1635. power->wakeup_ormatch; */ /* lym */
  1636. power->iq_wakeup_power = power->iq_wakeup_tagdrive + power->iq_wakeup_tagmatch + power->iq_wakeup_ormatch;
  1637. power->fpq_wakeup_power = power->fpq_wakeup_tagdrive + power->fpq_wakeup_tagmatch + power->fpq_wakeup_ormatch;
  1638. power->rs_power = power->rs_decoder +
  1639. power->rs_wordline + power->rs_bitline + power->rs_senseamp;
  1640. power->rs_power_nobit = power->rs_decoder +
  1641. power->rs_wordline + power->rs_senseamp;
  1642. /* lym */
  1643. power->window_power = power->iq_wakeup_power + power->fpq_wakeup_power + power->rs_power +
  1644. /* power->selection */ /* lym */ power->iq_selection + power->fpq_selection + power->lsq_selection;
  1645. /* lym */
  1646. power->lsq_rs_power = power->lsq_rs_decoder +
  1647. power->lsq_rs_wordline + power->lsq_rs_bitline +
  1648. power->lsq_rs_senseamp;
  1649. power->lsq_rs_power_nobit = power->lsq_rs_decoder +
  1650. power->lsq_rs_wordline + power->lsq_rs_senseamp;
  1651. power->lsq_wakeup_power = power->lsq_wakeup_tagdrive + power->lsq_wakeup_tagmatch;
  1652. power->lsq_power = power->lsq_wakeup_power + power->lsq_rs_power;
  1653. power->regfile_power = power->regfile_decoder +
  1654. power->regfile_wordline + power->regfile_bitline +
  1655. power->regfile_senseamp;
  1656. power->regfile_power_nobit = power->regfile_decoder +
  1657. power->regfile_wordline + power->regfile_senseamp;
  1658. /* MCREG Power modeling */
  1659. ireg2_power_per_reg = compute_ireg2_access_power(4.91) * 32; /* 32 bit data */
  1660. ireg2_address_power_per_reg = compute_ireg2_access_power(4.91) * 7; /* 7 bit address: 80 regs */
  1661. //=============================================================/
  1662. /* scale powers from mJ to J */
  1663. power->regfile_power *= 1e-3;
  1664. power->ialu_power *= 1e-3;
  1665. power->falu_power *= 1e-3;
  1666. power->resultbus *= 1e-3;
  1667. power->clock_power *= 1e-3;
  1668. power->regfile_power_nobit *= 1e-3;
  1669. power->regfile_bitline *= 1e-3;
  1670. double power_scale = frequency_scaling * voltage_scaling * voltage_scaling;
  1671. power->power_scale = power_scale;
  1672. #ifdef STATIC_AF
  1673. double regfile_power_leakage = power_scale *turnoff_factor*power->regfile_power;
  1674. #else
  1675. double regfile_power_leakage = power_scale *turnoff_factor*power->regfile_power;
  1676. #endif
  1677. #ifdef STATIC_AF
  1678. double resultbus_power_leakage = power_scale * turnoff_factor*power->resultbus;
  1679. #else
  1680. double resultbus_power_leakage = power_scale * turnoff_factor*power->resultbus;
  1681. #endif
  1682. power->regfile_leakage = regfile_power_leakage;
  1683. power->ialu_leakage = power_scale *turnoff_factor*power->ialu_power;
  1684. power->falu_leakage = power_scale *turnoff_factor*power->falu_power;
  1685. power->resultbus_leakage = resultbus_power_leakage;
  1686. power->decode_leakage = power_scale*turnoff_factor*power->inst_decoder_power;
  1687. double total_cycle_leakage_cc3=power->regfile_leakage
  1688. +power->ialu_leakage
  1689. +power->falu_leakage
  1690. +power->resultbus_leakage
  1691. +power->decode_leakage;
  1692. double max_cycle_power = power_scale*power->regfile_power \
  1693. + power_scale*power->ialu_power \
  1694. + power_scale*power->falu_power \
  1695. + power_scale*power->resultbus\
  1696. + power_scale*power->inst_decoder_power;
  1697. power->clock_leakage = power_scale * power->clock_power*(total_cycle_leakage_cc3/max_cycle_power);
  1698. power->total_leakage = power->regfile_leakage
  1699. +power->ialu_leakage
  1700. +power->falu_leakage
  1701. +power->resultbus_leakage
  1702. +power->clock_leakage
  1703. +power->decode_leakage;
  1704. //dump_power_stats(power);
  1705. }
  1706. void clear_dtm_stats();
  1707. /* FIXME! many stats - like thermal emergencies, triggers, ..don't seem to be reset */
  1708. void clear_lots_of_stats()
  1709. {
  1710. /*
  1711. int i = 0;
  1712. FPQ_fcount = 0;
  1713. FPQ_count = 0;
  1714. IQ_fcount = 0;
  1715. IQ_count = 0;
  1716. LSQ_fcount = 0;
  1717. LSQ_count = 0;
  1718. RUU_fcount = 0;
  1719. RUU_count = 0;
  1720. IFQ_fcount = 0;
  1721. IFQ_count = 0;
  1722. shadow_sim_cycle = sim_cycle;
  1723. shadow_sim_num_insn = sim_num_insn;
  1724. wall_clock_time = 0;
  1725. sim_total_insn = 0;
  1726. sim_num_branches = 0;
  1727. sim_total_loads = 0;
  1728. sim_total_refs = 0;
  1729. sim_num_branches = 0;
  1730. sim_num_loads = 0;
  1731. sim_num_refs = 0;
  1732. for(i = 0; i < flp_adj->n_units; i++)
  1733. overall_power[i] = 0;
  1734. */
  1735. total_rename_access=0;
  1736. total_bpred_access=0;
  1737. total_window_access=0;
  1738. total_lsq_access=0;
  1739. total_iq_access=0;
  1740. total_fpq_access=0;
  1741. total_regfile_access=0;
  1742. total_icache_access=0;
  1743. total_dcache_access=0;
  1744. total_dcache2_access=0;
  1745. total_alu_access=0;
  1746. total_resultbus_access=0;
  1747. max_rename_access = 0;
  1748. max_bpred_access = 0;
  1749. max_window_access = 0;
  1750. max_lsq_access = 0;
  1751. max_regfile_access = 0;
  1752. max_icache_access = 0;
  1753. max_dcache_access = 0;
  1754. max_dcache2_access = 0;
  1755. max_alu_access = 0;
  1756. max_resultbus_access = 0;
  1757. zc_cycles = 0;
  1758. //clear_dtm_stats();
  1759. /*
  1760. // clear predictor stats
  1761. if (pred)
  1762. bpred_clear_stats(pred);
  1763. // clear cache stats
  1764. if (cache_il1 && (cache_il1 != cache_dl1 && cache_il1 != cache_dl2))
  1765. cache_clear_stats(cache_il1);
  1766. if (cache_il2 && (cache_il2 != cache_dl1 && cache_il2 != cache_dl2))
  1767. cache_clear_stats(cache_il2);
  1768. if (cache_dl1)
  1769. cache_clear_stats(cache_dl1);
  1770. if (cache_dl2)
  1771. cache_clear_stats(cache_dl2);
  1772. if (itlb)
  1773. cache_clear_stats(itlb);
  1774. if (dtlb)
  1775. cache_clear_stats(dtlb);
  1776. */
  1777. }