PageRenderTime 38ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/ptlsim/core/ooo-core/ooo.cpp

https://github.com/stefanneumann/marss
C++ | 1461 lines | 1042 code | 211 blank | 208 comment | 157 complexity | 394fe4121e9a8c62434c47c59cfa7229 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0
  1. //
  2. // PTLsim: Cycle Accurate x86-64 Simulator
  3. // Out-of-Order Core Simulator
  4. // Core Structures
  5. //
  6. // Copyright 2003-2008 Matt T. Yourst <yourst@yourst.com>
  7. // Copyright 2006-2008 Hui Zeng <hzeng@cs.binghamton.edu>
  8. //
  9. // Modifications for MARSSx86
  10. // Copyright 2009-2010 Avadh Patel <avadh4all@gmail.com>
  11. #include <globals.h>
  12. #include <elf.h>
  13. #include <ptlsim.h>
  14. #include <branchpred.h>
  15. #include <logic.h>
  16. #include <statelist.h>
  17. #include <superstl.h>
  18. #define DECLARE_STRUCTURES
  19. #include <ooo.h>
  20. #include <memoryHierarchy.h>
  21. #define MYDEBUG if(logable(99)) ptl_logfile
  22. #ifndef ENABLE_CHECKS
  23. #undef assert
  24. #define assert(x) (x)
  25. #endif
  26. #ifndef ENABLE_LOGGING
  27. #undef logable
  28. #define logable(level) (0)
  29. #endif
  30. using namespace OOO_CORE_MODEL;
  31. using namespace superstl;
  32. namespace OOO_CORE_MODEL {
  33. byte uop_executable_on_cluster[OP_MAX_OPCODE];
  34. W32 forward_at_cycle_lut[MAX_CLUSTERS][MAX_FORWARDING_LATENCY+1];
  35. bool globals_initialized = false;
  36. };
  37. //
  38. // Initialize lookup tables used by the simulation
  39. //
  40. static void init_luts() {
  41. if(globals_initialized)
  42. return;
  43. // Initialize opcode maps
  44. foreach (i, OP_MAX_OPCODE) {
  45. W32 allowedfu = fuinfo[i].fu;
  46. W32 allowedcl = 0;
  47. foreach (cl, MAX_CLUSTERS) {
  48. if (clusters[cl].fu_mask & allowedfu) setbit(allowedcl, cl);
  49. }
  50. uop_executable_on_cluster[i] = allowedcl;
  51. }
  52. // Initialize forward-at-cycle LUTs
  53. foreach (srcc, MAX_CLUSTERS) {
  54. foreach (destc, MAX_CLUSTERS) {
  55. foreach (lat, MAX_FORWARDING_LATENCY+1) {
  56. if (lat == intercluster_latency_map[srcc][destc]) {
  57. setbit(forward_at_cycle_lut[srcc][lat], destc);
  58. }
  59. }
  60. }
  61. }
  62. globals_initialized = true;
  63. }
  64. ThreadContext::ThreadContext(OooCore& core_, W8 threadid_, Context& ctx_)
  65. : core(core_), threadid(threadid_), ctx(ctx_)
  66. , thread_stats("thread", &core_)
  67. {
  68. stringbuf stats_name;
  69. stats_name << "thread" << threadid;
  70. thread_stats.update_name(stats_name.buf);
  71. // Set decoder stats
  72. set_decoder_stats(&thread_stats, ctx.cpu_index);
  73. // Connect stats equations
  74. thread_stats.issue.uipc.add_elem(&thread_stats.issue.uops);
  75. thread_stats.issue.uipc.add_elem(&core_.core_stats.cycles);
  76. thread_stats.commit.uipc.add_elem(&thread_stats.commit.uops);
  77. thread_stats.commit.uipc.add_elem(&core_.core_stats.cycles);
  78. thread_stats.commit.ipc.add_elem(&thread_stats.commit.insns);
  79. thread_stats.commit.ipc.add_elem(&core_.core_stats.cycles);
  80. // thread_stats.commit.ipc.enable_periodic_dump();
  81. thread_stats.set_default_stats(user_stats);
  82. reset();
  83. }
  84. void ThreadContext::reset() {
  85. setzero(specrrt);
  86. setzero(commitrrt);
  87. setzero(fetchrip);
  88. current_basic_block = NULL;
  89. current_basic_block_transop_index = -1;
  90. stall_frontend = false;
  91. waiting_for_icache_fill = false;
  92. waiting_for_icache_fill_physaddr = 0;
  93. fetch_uuid = 0;
  94. current_icache_block = 0;
  95. loads_in_flight = 0;
  96. stores_in_flight = 0;
  97. prev_interrupts_pending = false;
  98. handle_interrupt_at_next_eom = false;
  99. stop_at_next_eom = false;
  100. last_commit_at_cycle = 0;
  101. smc_invalidate_pending = 0;
  102. setzero(smc_invalidate_rvp);
  103. chk_recovery_rip = 0;
  104. unaligned_ldst_buf.reset();
  105. consecutive_commits_inside_spinlock = 0;
  106. pause_counter = 0;
  107. total_uops_committed = 0;
  108. total_insns_committed = 0;
  109. dispatch_deadlock_countdown = 0;
  110. #ifdef MULTI_IQ
  111. foreach(i, 4){
  112. issueq_count[i] = 0;
  113. }
  114. #else
  115. issueq_count = 0;
  116. #endif
  117. queued_mem_lock_release_count = 0;
  118. branchpred.init(coreid, threadid);
  119. in_tlb_walk = 0;
  120. }
  121. void ThreadContext::setupTLB() {
  122. foreach(i, CPU_TLB_SIZE) {
  123. W64 dtlb_addr = ctx.tlb_table[!ctx.kernel_mode][i].addr_read;
  124. W64 itlb_addr = ctx.tlb_table[!ctx.kernel_mode][i].addr_code;
  125. if((dtlb_addr ) != -1) {
  126. dtlb.insert(dtlb_addr);
  127. }
  128. if((itlb_addr) != -1) {
  129. itlb.insert(itlb_addr);
  130. }
  131. }
  132. }
  133. void ThreadContext::init() {
  134. rob_states.reset();
  135. //
  136. // ROB states
  137. //
  138. rob_free_list("free", rob_states, 0);
  139. rob_frontend_list("frontend", rob_states, ROB_STATE_PRE_READY_TO_DISPATCH);
  140. rob_ready_to_dispatch_list("ready-to-dispatch", rob_states, 0);
  141. InitClusteredROBList(rob_dispatched_list, "dispatched", ROB_STATE_IN_ISSUE_QUEUE);
  142. InitClusteredROBList(rob_ready_to_issue_list, "ready-to-issue", ROB_STATE_IN_ISSUE_QUEUE);
  143. InitClusteredROBList(rob_ready_to_store_list, "ready-to-store", ROB_STATE_IN_ISSUE_QUEUE);
  144. InitClusteredROBList(rob_ready_to_load_list, "ready-to-load", ROB_STATE_IN_ISSUE_QUEUE);
  145. InitClusteredROBList(rob_issued_list, "issued", 0);
  146. InitClusteredROBList(rob_completed_list, "completed", ROB_STATE_READY);
  147. InitClusteredROBList(rob_ready_to_writeback_list, "ready-to-write", ROB_STATE_READY);
  148. rob_cache_miss_list("cache-miss", rob_states, 0);
  149. rob_tlb_miss_list("tlb-miss", rob_states, 0);
  150. rob_memory_fence_list("memory-fence", rob_states, 0);
  151. rob_ready_to_commit_queue("ready-to-commit", rob_states, ROB_STATE_READY);
  152. // Setup TLB of each thread
  153. setupTLB();
  154. reset();
  155. coreid = core.coreid;
  156. }
  157. OooCore::OooCore(BaseMachine& machine_, W8 num_threads,
  158. const char* name)
  159. : BaseCore(machine_, name)
  160. , core_stats("core", this)
  161. {
  162. coreid = machine.get_next_coreid();
  163. if(!machine_.get_option(name, "threads", threadcount)) {
  164. threadcount = 1;
  165. }
  166. setzero(threads);
  167. assert(num_threads > 0 && "Core has atleast 1 thread");
  168. // Rename the stats
  169. stringbuf core_name;
  170. if(name) {
  171. core_name << name << "_" << coreid;
  172. } else {
  173. core_name << "core_" << coreid;
  174. }
  175. update_name(core_name.buf);
  176. // Setup Cache Signals
  177. stringbuf sig_name;
  178. sig_name << core_name << "-dcache-wakeup";
  179. dcache_signal.set_name(sig_name.buf);
  180. dcache_signal.connect(signal_mem_ptr(*this,
  181. &OooCore::dcache_wakeup));
  182. sig_name.reset();
  183. sig_name << core_name << "-icache-wakeup";
  184. icache_signal.set_name(sig_name.buf);
  185. icache_signal.connect(signal_mem_ptr(*this,
  186. &OooCore::icache_wakeup));
  187. threads = (ThreadContext**)malloc(sizeof(ThreadContext*) * threadcount);
  188. // Setup Threads
  189. foreach(i, threadcount) {
  190. Context& ctx = machine.get_next_context();//coreid + i);
  191. ThreadContext* thread = new ThreadContext(*this, i, ctx);
  192. threads[i] = thread;
  193. thread->init();
  194. }
  195. init();
  196. init_luts();
  197. }
  198. void OooCore::reset() {
  199. round_robin_tid = 0;
  200. round_robin_reg_file_offset = 0;
  201. setzero(robs_on_fu);
  202. foreach_issueq(reset(coreid, this));
  203. #ifndef MULTI_IQ
  204. int reserved_iq_entries_per_thread = (int)sqrt(
  205. ISSUE_QUEUE_SIZE / threadcount);
  206. reserved_iq_entries = reserved_iq_entries_per_thread * \
  207. threadcount;
  208. assert(reserved_iq_entries && reserved_iq_entries < \
  209. ISSUE_QUEUE_SIZE);
  210. foreach_issueq(set_reserved_entries(reserved_iq_entries));
  211. #else
  212. int reserved_iq_entries_per_thread = (int)sqrt(
  213. ISSUE_QUEUE_SIZE / threadcount);
  214. for_each_cluster(cluster){
  215. reserved_iq_entries[cluster] = reserved_iq_entries_per_thread * \
  216. threadcount;
  217. assert(reserved_iq_entries[cluster] && reserved_iq_entries[cluster] < \
  218. ISSUE_QUEUE_SIZE);
  219. }
  220. foreach_issueq(set_reserved_entries(
  221. reserved_iq_entries_per_thread * threadcount));
  222. #endif
  223. foreach_issueq(reset_shared_entries());
  224. unaligned_predictor.reset();
  225. foreach (i, threadcount) threads[i]->reset();
  226. }
  227. void OooCore::init_generic() {
  228. reset();
  229. }
  230. template <typename T>
  231. static void OOO_CORE_MODEL::print_list_of_state_lists(ostream& os, const ListOfStateLists& lol, const char* title) {
  232. os << title, ":", endl;
  233. foreach (i, lol.count) {
  234. StateList& list = *lol[i];
  235. os << list.name, " (", list.count, " entries):", endl;
  236. int n = 0;
  237. T* obj;
  238. foreach_list_mutable(list, obj, entry, nextentry) {
  239. if ((n % 16) == 0) os << " ";
  240. os << " ", intstring(obj->index(), -3);
  241. if (((n % 16) == 15) || (n == list.count-1)) os << endl;
  242. n++;
  243. }
  244. assert(n == list.count);
  245. os << endl;
  246. }
  247. }
  248. void PhysicalRegisterFile::init(const char* name, W8 coreid, int rfid, int size, OooCore* core) {
  249. assert(rfid < PHYS_REG_FILE_COUNT);
  250. assert(size <= MAX_PHYS_REG_FILE_SIZE);
  251. this->size = size;
  252. this->coreid = coreid;
  253. this->core = core;
  254. this->rfid = rfid;
  255. this->name = name;
  256. this->allocations = 0;
  257. this->frees = 0;
  258. foreach (i, MAX_PHYSREG_STATE) {
  259. stringbuf sb;
  260. sb << name, "-", physreg_state_names[i];
  261. states[i].init(sb, getcore().physreg_states);
  262. }
  263. foreach (i, size) {
  264. (*this)[i].init(coreid, rfid, i, core);
  265. }
  266. }
  267. PhysicalRegister* PhysicalRegisterFile::alloc(W8 threadid, int r) {
  268. PhysicalRegister* physreg = (PhysicalRegister*)((r == 0) ? &(*this)[r] : states[PHYSREG_FREE].peek());
  269. if unlikely (!physreg) return NULL;
  270. physreg->changestate(PHYSREG_WAITING);
  271. physreg->flags = FLAG_WAIT;
  272. physreg->threadid = threadid;
  273. allocations++;
  274. assert(states[PHYSREG_FREE].count >= 0);
  275. return physreg;
  276. }
  277. ostream& PhysicalRegisterFile::print(ostream& os) const {
  278. os << "PhysicalRegisterFile<", name, ", rfid ", rfid, ", size ", size, ">:", endl;
  279. foreach (i, size) {
  280. os << (*this)[i], endl;
  281. }
  282. return os;
  283. }
  284. void PhysicalRegisterFile::reset(W8 threadid) {
  285. foreach (i, size) {
  286. if ((*this)[i].threadid == threadid) {
  287. (*this)[i].reset(threadid);
  288. }
  289. }
  290. }
  291. bool PhysicalRegisterFile::cleanup() {
  292. int freed = 0;
  293. PhysicalRegister* physreg;
  294. StateList& statelist = this->states[PHYSREG_PENDINGFREE];
  295. foreach_list_mutable(statelist, physreg, entry, nextentry) {
  296. if unlikely (!physreg->referenced()) {
  297. physreg->free();
  298. freed++;
  299. }
  300. }
  301. CORE_DEF_STATS(commit.free_reg_recycled) += freed;
  302. return (freed > 0);
  303. }
  304. void PhysicalRegisterFile::reset() {
  305. foreach (i, MAX_PHYSREG_STATE) {
  306. states[i].reset();
  307. }
  308. foreach (i, size) {
  309. (*this)[i].reset(0, false);
  310. }
  311. }
  312. StateList& PhysicalRegister::get_state_list(int s) const {
  313. return core->physregfiles[rfid].states[s];
  314. }
  315. namespace OOO_CORE_MODEL {
  316. ostream& operator <<(ostream& os, const PhysicalRegister& physreg) {
  317. stringbuf sb;
  318. print_value_and_flags(sb, physreg.data, physreg.flags);
  319. os << "TH ", physreg.threadid, " rfid ", physreg.rfid;
  320. os << " r", intstring(physreg.index(), -3), " state ", padstring(physreg.get_state_list().name, -12), " ", sb;
  321. if (physreg.rob) os << " rob ", physreg.rob->index(), " (uuid ", physreg.rob->uop.uuid, ")";
  322. os << " refcount ", physreg.refcount;
  323. return os;
  324. }
  325. };
  326. ostream& RegisterRenameTable::print(ostream& os) const {
  327. foreach (i, TRANSREG_COUNT) {
  328. if ((i % 8) == 0) os << " ";
  329. os << " ", padstring(arch_reg_names[i], -6), " r", intstring((*this)[i]->index(), -3), " | ";
  330. if (((i % 8) == 7) || (i == TRANSREG_COUNT-1)) os << endl;
  331. }
  332. return os;
  333. }
  334. //
  335. // Get the thread priority, with lower numbers receiving higher priority.
  336. // This is used to regulate the order in which fetch, rename, frontend
  337. // and dispatch slots are filled in each cycle.
  338. //
  339. // The well known ICOUNT algorithm adds up the number of uops in
  340. // the frontend pipeline stages and gives highest priority to
  341. // the thread with the lowest number, since this thread is moving
  342. // uops through very quickly and can make more progress.
  343. //
  344. int ThreadContext::get_priority() const {
  345. int priority =
  346. fetchq.count +
  347. rob_frontend_list.count +
  348. rob_ready_to_dispatch_list.count;
  349. for_each_cluster (cluster) {
  350. priority +=
  351. rob_dispatched_list[cluster].count +
  352. rob_ready_to_issue_list[cluster].count +
  353. rob_ready_to_store_list[cluster].count +
  354. rob_ready_to_load_list[cluster].count;
  355. }
  356. return priority;
  357. }
  358. //
  359. // Execute one cycle of the entire core state machine
  360. //
  361. bool OooCore::runcycle() {
  362. bool exiting = 0;
  363. //
  364. // Detect edge triggered transition from 0->1 for
  365. // pending interrupt events, then wait for current
  366. // x86 insn EOM uop to commit before redirecting
  367. // to the interrupt handler.
  368. //
  369. foreach (i, threadcount) {
  370. ThreadContext* thread = threads[i];
  371. bool current_interrupts_pending = thread->ctx.check_events();
  372. thread->handle_interrupt_at_next_eom = current_interrupts_pending;
  373. thread->prev_interrupts_pending = current_interrupts_pending;
  374. if(thread->ctx.kernel_mode) {
  375. thread->thread_stats.set_default_stats(kernel_stats);
  376. } else {
  377. thread->thread_stats.set_default_stats(user_stats);
  378. }
  379. }
  380. // Each core's thread-shared stats counter will be added to
  381. // the thread-0's counters for simplicity
  382. set_default_stats(threads[0]->thread_stats.get_default_stats(), false);
  383. //
  384. // Compute reserved issue queue entries to avoid starvation:
  385. //
  386. #ifdef ENABLE_CHECKS_IQ
  387. // at any cycle, for any issuq, total free entries == shared_free_entries + total_issueq_reserved_free
  388. MYDEBUG << " enable_checks_IQ : core[", coreid,"]:",endl;
  389. #ifndef MULTI_IQ
  390. int total_issueq_count = 0;
  391. int total_issueq_reserved_free = 0;
  392. int reserved_iq_entries_per_thread = reserved_iq_entries / threadcount;
  393. foreach (i, threadcount) {
  394. ThreadContext* thread = threads[i];
  395. assert(thread);
  396. stats = thread->stats_;
  397. total_issueq_count += thread->issueq_count;
  398. if(thread->issueq_count < reserved_iq_entries_per_thread){
  399. total_issueq_reserved_free += reserved_iq_entries_per_thread - thread->issueq_count;
  400. }
  401. }
  402. MYDEBUG << " ISSUE_QUEUE_SIZE ", ISSUE_QUEUE_SIZE, " issueq_all.count ", issueq_all.count, " issueq_all.shared_free_entries ",
  403. issueq_all.shared_free_entries, " total_issueq_reserved_free ", total_issueq_reserved_free,
  404. " reserved_iq_entries ", reserved_iq_entries, " total_issueq_count ", total_issueq_count, endl;
  405. assert (total_issueq_count == issueq_all.count);
  406. assert((ISSUE_QUEUE_SIZE - issueq_all.count) == (issueq_all.shared_free_entries + total_issueq_reserved_free));
  407. #else
  408. foreach(cluster, 4){
  409. int total_issueq_count = 0;
  410. int total_issueq_reserved_free = 0;
  411. int reserved_iq_entries_per_thread = reserved_iq_entries[cluster] / threadcount;
  412. foreach (i, threadcount) {
  413. ThreadContext* thread = threads[i];
  414. assert(thread);
  415. stats = thread->stats_;
  416. MYDEBUG << " TH[", thread->threadid, "] issueq_count[", cluster, "] ", thread->issueq_count[cluster], endl;
  417. assert(thread->issueq_count[cluster] >=0);
  418. total_issueq_count += thread->issueq_count[cluster];
  419. if(thread->issueq_count[cluster] < reserved_iq_entries_per_thread){
  420. total_issueq_reserved_free += reserved_iq_entries_per_thread - thread->issueq_count[cluster];
  421. }
  422. }
  423. int issueq_count = 0;
  424. issueq_operation_on_cluster_with_result((*this), cluster, issueq_count, count);
  425. int issueq_shared_free_entries = 0;
  426. issueq_operation_on_cluster_with_result((*this), cluster, issueq_shared_free_entries, shared_free_entries);
  427. MYDEBUG << " cluster[", cluster, "] ISSUE_QUEUE_SIZE ", ISSUE_QUEUE_SIZE, " issueq[" , cluster, "].count ", issueq_count, " issueq[" , cluster, "].shared_free_entries ",
  428. issueq_shared_free_entries, " total_issueq_reserved_free ", total_issueq_reserved_free,
  429. " reserved_iq_entries ", reserved_iq_entries[cluster], " total_issueq_count ", total_issueq_count, endl;
  430. assert (total_issueq_count == issueq_count);
  431. assert((ISSUE_QUEUE_SIZE - issueq_count) == (issueq_shared_free_entries + total_issueq_reserved_free));
  432. }
  433. #endif
  434. #endif
  435. foreach (i, threadcount) threads[i]->loads_in_this_cycle = 0;
  436. fu_avail = bitmask(FU_COUNT);
  437. //
  438. // Backend and issue pipe stages run with round robin priority
  439. //
  440. int commitrc[threadcount];
  441. commitcount = 0;
  442. writecount = 0;
  443. if (logable(9)) {
  444. ptl_logfile << "OooCore::run():thread-commit\n";
  445. }
  446. foreach (permute, threadcount) {
  447. int tid = add_index_modulo(round_robin_tid, +permute, threadcount);
  448. ThreadContext* thread = threads[tid];
  449. if unlikely (!thread->ctx.running) continue;
  450. if (thread->pause_counter > 0) {
  451. thread->pause_counter--;
  452. if(thread->handle_interrupt_at_next_eom) {
  453. commitrc[tid] = COMMIT_RESULT_INTERRUPT;
  454. if(thread->ctx.is_int_pending()) {
  455. thread->thread_stats.cycles_in_pause -=
  456. thread->pause_counter;
  457. thread->pause_counter = 0;
  458. }
  459. } else {
  460. commitrc[tid] = COMMIT_RESULT_OK;
  461. }
  462. continue;
  463. }
  464. commitrc[tid] = thread->commit();
  465. for_each_cluster(j) thread->writeback(j);
  466. for_each_cluster(j) thread->transfer(j);
  467. }
  468. if (logable(100)) {
  469. ptl_logfile << "OooCore::run():context after commit\n";
  470. ptl_logfile << flush;
  471. foreach(x, threadcount) {
  472. ptl_logfile << threads[x]->ctx, endl;
  473. }
  474. }
  475. //
  476. // Clock the TLB miss page table walk state machine
  477. // This may use up load ports, so do it before other
  478. // loads can issue
  479. //
  480. foreach (permute, threadcount) {
  481. int tid = add_index_modulo(round_robin_tid, +permute, threadcount);
  482. ThreadContext* thread = threads[tid];
  483. thread->tlbwalk();
  484. }
  485. /* svn 225
  486. foreach (i, threadcount) {
  487. threads[i]->tlbwalk();
  488. }
  489. */
  490. //
  491. // Issue whatever is ready
  492. //
  493. if (logable(9)) {
  494. ptl_logfile << "OooCore::run():issue\n";
  495. }
  496. for_each_cluster(i) { issue(i); }
  497. //
  498. // Most of the frontend (except fetch!) also works with round robin priority
  499. //
  500. if (logable(9)) {
  501. ptl_logfile << "OooCore::run():dispatch\n";
  502. }
  503. int dispatchrc[threadcount];
  504. dispatchcount = 0;
  505. foreach (permute, threadcount) {
  506. int tid = add_index_modulo(round_robin_tid, +permute, threadcount);
  507. ThreadContext* thread = threads[tid];
  508. if unlikely (!thread->ctx.running) continue;
  509. for_each_cluster(j) { thread->complete(j); }
  510. dispatchrc[tid] = thread->dispatch();
  511. if likely (dispatchrc[tid] >= 0) {
  512. thread->frontend();
  513. thread->rename();
  514. }
  515. }
  516. //
  517. // Compute fetch priorities (default is ICOUNT algorithm)
  518. //
  519. // This means we sort in ascending order, with any unused threads
  520. // (if any) given the lowest priority.
  521. //
  522. if (logable(9)) {
  523. ptl_logfile << "OooCore::run():fetch\n";
  524. }
  525. int priority_value[threadcount];
  526. int priority_index[threadcount];
  527. if likely (threadcount == 1) {
  528. priority_value[0] = 0;
  529. priority_index[0] = 0;
  530. } else {
  531. foreach (i, threadcount) {
  532. priority_index[i] = i;
  533. ThreadContext* thread = threads[i];
  534. priority_value[i] = thread->get_priority();
  535. if unlikely (!thread->ctx.running) priority_value[i] = limits<int>::max;
  536. }
  537. sort(priority_index, threadcount, SortPrecomputedIndexListComparator<int, false>(priority_value));
  538. }
  539. //
  540. // Fetch in thread priority order
  541. //
  542. // NOTE: True ICOUNT only fetches the highest priority
  543. // thread per cycle, since there is usually only one
  544. // instruction cache port. In a banked i-cache, we can
  545. // fetch from multiple threads every cycle.
  546. //
  547. bool fetch_exception[threadcount];
  548. foreach (j, threadcount) {
  549. int i = priority_index[j];
  550. ThreadContext* thread = threads[i];
  551. assert(thread);
  552. fetch_exception[i] = true;
  553. if unlikely (!thread->ctx.running) {
  554. continue;
  555. }
  556. if likely (dispatchrc[i] >= 0) {
  557. fetch_exception[i] = thread->fetch();
  558. }
  559. }
  560. //
  561. // Always clock the issue queues: they're independent of all threads
  562. //
  563. foreach_issueq(clock());
  564. //
  565. // Advance the round robin priority index
  566. //
  567. round_robin_tid = add_index_modulo(round_robin_tid, +1, threadcount);
  568. #ifdef ENABLE_CHECKS
  569. // This significantly slows down simulation; only enable it if absolutely needed:
  570. // check_refcounts();
  571. #endif
  572. foreach (i, threadcount) {
  573. ThreadContext* thread = threads[i];
  574. if unlikely (!thread->ctx.running) continue;
  575. int rc = commitrc[i];
  576. if (logable(9)) {
  577. ptl_logfile << "OooCore::run():result check thread[",
  578. i, "] rc[", rc, "]\n";
  579. }
  580. if likely ((rc == COMMIT_RESULT_OK) | (rc == COMMIT_RESULT_NONE)) {
  581. if(fetch_exception[i])
  582. continue;
  583. // Its a instruction page fault
  584. rc = COMMIT_RESULT_EXCEPTION;
  585. thread->ctx.exception = EXCEPTION_PageFaultOnExec;
  586. thread->ctx.page_fault_addr = thread->ctx.exec_fault_addr;
  587. }
  588. switch (rc) {
  589. case COMMIT_RESULT_SMC:
  590. {
  591. if (logable(3)) ptl_logfile << "Potentially cross-modifying SMC detected: global flush required (cycle ", sim_cycle, ", ", total_user_insns_committed, " commits)", endl, flush;
  592. //
  593. // DO NOT GLOBALLY FLUSH! It will cut off the other thread(s) in the
  594. // middle of their currently committing x86 instruction, causing massive
  595. // internal corruption on any VCPUs that happen to be straddling the
  596. // instruction boundary.
  597. //
  598. // BAD: machine.flush_all_pipelines();
  599. //
  600. // This is a temporary fix: in the *extremely* rare case where both
  601. // threads have the same basic block in their pipelines and that
  602. // BB is being invalidated, the BB cache will forbid us from
  603. // freeing it (and will print a warning to that effect).
  604. //
  605. // I'm working on a solution to this, to put some BBs on an
  606. // "invisible" list, where they cannot be looked up anymore,
  607. // but their memory is not freed until the lock is released.
  608. //
  609. foreach (i, threadcount) {
  610. ThreadContext* t = threads[i];
  611. if unlikely (!t) continue;
  612. if (logable(3)) {
  613. ptl_logfile << " [vcpu ", i, "] current_basic_block = ", t->current_basic_block; ": ";
  614. if (t->current_basic_block) ptl_logfile << t->current_basic_block->rip;
  615. ptl_logfile << endl;
  616. }
  617. }
  618. thread->flush_pipeline();
  619. thread->invalidate_smc();
  620. break;
  621. }
  622. case COMMIT_RESULT_EXCEPTION:
  623. {
  624. if (logable(3) && thread->current_basic_block &&
  625. thread->current_basic_block->rip) {
  626. ptl_logfile << " [vcpu ", thread->ctx.cpu_index, "] in exception handling at rip ", thread->current_basic_block->rip, endl, flush;
  627. }
  628. exiting = !thread->handle_exception();
  629. break;
  630. }
  631. case COMMIT_RESULT_BARRIER:
  632. {
  633. if (logable(3) && thread->current_basic_block &&
  634. thread->current_basic_block->rip) {
  635. ptl_logfile << " [vcpu ", thread->ctx.cpu_index, "] in barrier handling at rip ", thread->current_basic_block->rip, endl, flush;
  636. }
  637. exiting = !thread->handle_barrier();
  638. break;
  639. }
  640. case COMMIT_RESULT_INTERRUPT:
  641. {
  642. if (logable(3) && thread->current_basic_block &&
  643. thread->current_basic_block->rip) {
  644. ptl_logfile << " [vcpu ", thread->ctx.cpu_index, "] in interrupt handling at rip ", thread->current_basic_block->rip, endl, flush;
  645. }
  646. exiting = 1;
  647. thread->handle_interrupt();
  648. break;
  649. }
  650. case COMMIT_RESULT_STOP:
  651. {
  652. if (logable(3)) ptl_logfile << " COMMIT_RESULT_STOP, flush_pipeline().",endl;
  653. thread->flush_pipeline();
  654. thread->stall_frontend = 1;
  655. // machine.stopped[thread->ctx.cpu_index] = 1;
  656. // Wait for other cores to sync up, so don't exit right away
  657. break;
  658. }
  659. }
  660. if(exiting)
  661. machine.ret_qemu_env = &thread->ctx;
  662. }
  663. // return false;
  664. // if unlikely (vcpu_online_map_changed) {
  665. // vcpu_online_map_changed = 0;
  666. // foreach (i, contextcount) {
  667. // Context& vctx = contextof(i);
  668. // if likely (!vctx.dirty) continue;
  669. // //
  670. // // The VCPU is coming up for the first time after booting or being
  671. // // taken offline by the user.
  672. // //
  673. // // Force the active core model to flush any cached (uninitialized)
  674. // // internal state (like register file copies) it might have, since
  675. // // it did not know anything about this VCPU prior to now: if it
  676. // // suddenly gets marked as running without this, the core model
  677. // // will try to execute from bogus state data.
  678. // //
  679. // ptl_logfile << "VCPU ", vctx.cpu_index, " context was dirty: update core model internal state", endl;
  680. //
  681. // ThreadContext* tc = threads[vctx.cpu_index];
  682. // assert(tc);
  683. // assert(&tc->ctx == &vctx);
  684. // tc->flush_pipeline();
  685. // vctx.dirty = 0;
  686. // }
  687. // }
  688. foreach (i, threadcount) {
  689. ThreadContext* thread = threads[i];
  690. if (logable(9)) {
  691. stringbuf sb;
  692. sb << "[vcpu ", thread->ctx.cpu_index, "] thread ", thread->threadid, ": WARNING: At cycle ",
  693. sim_cycle, ", ", total_user_insns_committed, " user commits: ",
  694. (sim_cycle - thread->last_commit_at_cycle), " cycles;", endl;
  695. ptl_logfile << sb, flush;
  696. }
  697. }
  698. foreach (i, threadcount) {
  699. ThreadContext* thread = threads[i];
  700. if unlikely (!thread->ctx.running) break;
  701. if unlikely ((sim_cycle - thread->last_commit_at_cycle) > 1024*1024*threadcount) {
  702. stringbuf sb;
  703. sb << "[vcpu ", thread->ctx.cpu_index, "] thread ", thread->threadid, ": WARNING: At cycle ",
  704. sim_cycle, ", ", total_user_insns_committed, " user commits: no instructions have committed for ",
  705. (sim_cycle - thread->last_commit_at_cycle), " cycles; the pipeline could be deadlocked", endl;
  706. ptl_logfile << sb, flush;
  707. cerr << sb, flush;
  708. machine.dump_state(ptl_logfile);
  709. ptl_logfile.flush();
  710. exiting = 1;
  711. assert(0);
  712. assert_fail(__STRING(0), __FILE__, __LINE__, __PRETTY_FUNCTION__);
  713. }
  714. }
  715. core_stats.cycles++;
  716. return exiting;
  717. }
  718. //
  719. // ReorderBufferEntry
  720. //
  721. void ReorderBufferEntry::init(int idx) {
  722. this->idx = idx;
  723. entry_valid = 0;
  724. selfqueuelink::reset();
  725. current_state_list = NULL;
  726. reset();
  727. }
  728. //
  729. // Clean out various fields from the ROB entry that are
  730. // expected to be zero when allocating a new ROB entry.
  731. //
  732. void ReorderBufferEntry::reset() {
  733. int latency, operand;
  734. // Deallocate ROB entry
  735. entry_valid = false;
  736. cycles_left = 0;
  737. uop.uuid = -1;
  738. physreg = (PhysicalRegister*)NULL;
  739. lfrqslot = -1;
  740. lsq = 0;
  741. load_store_second_phase = 0;
  742. lock_acquired = 0;
  743. consumer_count = 0;
  744. executable_on_cluster_mask = 0;
  745. pteupdate = 0;
  746. cluster = -1;
  747. #ifdef ENABLE_TRANSIENT_VALUE_TRACKING
  748. dest_renamed_before_writeback = 0;
  749. no_branches_between_renamings = 0;
  750. #endif
  751. issued = 0;
  752. generated_addr = original_addr = cache_data = 0;
  753. annul_flag = 0;
  754. }
  755. bool ReorderBufferEntry::ready_to_issue() const {
  756. bool raready = operands[0]->ready();
  757. bool rbready = operands[1]->ready();
  758. bool rcready = operands[2]->ready();
  759. bool rsready = operands[3]->ready();
  760. if (isstore(uop.opcode)) {
  761. return (load_store_second_phase) ? (raready & rbready & rcready & rsready) : (raready & rbready);
  762. } else if (isload(uop.opcode)) {
  763. return (load_store_second_phase) ? (raready & rbready & rcready & rsready) : (raready & rbready & rcready);
  764. } else {
  765. return (raready & rbready & rcready & rsready);
  766. }
  767. }
  768. bool ReorderBufferEntry::ready_to_commit() const {
  769. return (current_state_list == &getthread().rob_ready_to_commit_queue);
  770. }
  771. StateList& ReorderBufferEntry::get_ready_to_issue_list() {
  772. OooCore& core = getcore();
  773. ThreadContext& thread = getthread();
  774. return
  775. isload(uop.opcode) ? thread.rob_ready_to_load_list[cluster] :
  776. isstore(uop.opcode) ? thread.rob_ready_to_store_list[cluster] :
  777. thread.rob_ready_to_issue_list[cluster];
  778. }
  779. //
  780. // Reorder Buffer
  781. //
  782. stringbuf& ReorderBufferEntry::get_operand_info(stringbuf& sb, int operand) const {
  783. PhysicalRegister& physreg = *operands[operand];
  784. ReorderBufferEntry& sourcerob = *physreg.rob;
  785. sb << "r", physreg.index();
  786. if (PHYS_REG_FILE_COUNT > 1) sb << "@", getcore().physregfiles[physreg.rfid].name;
  787. switch (physreg.state) {
  788. case PHYSREG_WRITTEN:
  789. sb << " (written)"; break;
  790. case PHYSREG_BYPASS:
  791. sb << " (ready)"; break;
  792. case PHYSREG_WAITING:
  793. sb << " (wait rob ", sourcerob.index(), " uuid ", sourcerob.uop.uuid, ")"; break;
  794. case PHYSREG_ARCH: break;
  795. if (physreg.index() == PHYS_REG_NULL) sb << " (zero)"; else sb << " (arch ", arch_reg_names[physreg.archreg], ")"; break;
  796. case PHYSREG_PENDINGFREE:
  797. sb << " (pending free for ", arch_reg_names[physreg.archreg], ")"; break;
  798. default:
  799. // Cannot be in free state!
  800. sb << " (FREE)"; break;
  801. }
  802. return sb;
  803. }
  804. ThreadContext& ReorderBufferEntry::getthread() const { return *core->threads[threadid]; }
  805. issueq_tag_t ReorderBufferEntry::get_tag() {
  806. int mask = ((1 << MAX_THREADS_BIT) - 1) << MAX_ROB_IDX_BIT;
  807. if (logable(100)) ptl_logfile << " get_tag() thread ", hexstring(threadid, 8), " rob idx ", hexstring(idx, 16), " mask ", hexstring(mask, 32), endl;
  808. assert(!(idx & mask));
  809. assert(!(threadid >> MAX_THREADS_BIT));
  810. issueq_tag_t rc = (idx | (threadid << MAX_ROB_IDX_BIT));
  811. if (logable(100)) ptl_logfile << " tag ", hexstring(rc, 16), endl;
  812. return rc;
  813. }
  814. ostream& ReorderBufferEntry::print_operand_info(ostream& os, int operand) {
  815. stringbuf sb;
  816. get_operand_info(sb, operand);
  817. os << sb;
  818. return os;
  819. }
  820. ostream& ReorderBufferEntry::print(ostream& os) const {
  821. stringbuf name, rainfo, rbinfo, rcinfo;
  822. nameof(name, uop);
  823. get_operand_info(rainfo, 0);
  824. get_operand_info(rbinfo, 1);
  825. get_operand_info(rcinfo, 2);
  826. if(!current_state_list || !physreg){
  827. os << " rob ", intstring(index(), -3), " uuid ", intstring(uop.uuid, 16), " is not valid. ";
  828. return os;
  829. }
  830. os << "rob ", intstring(index(), -3), " uuid ", intstring(uop.uuid, 16), " rip 0x", hexstring(uop.rip, 48), " ",
  831. padstring(current_state_list->name, -24), " ", (uop.som ? "SOM" : " "), " ", (uop.eom ? "EOM" : " "),
  832. " @ ", padstring((cluster >= 0) ? clusters[cluster].name : "???", -4), " ",
  833. padstring(name, -12), " r", intstring(physreg->index(), -3), " ", padstring(arch_reg_names[uop.rd], -6);
  834. if (isload(uop.opcode)){
  835. if(lsq) os << " ld", intstring(lsq->index(), -3);
  836. }else if (isstore(uop.opcode)){
  837. if(lsq) os << " st", intstring(lsq->index(), -3);
  838. }else os << " ";
  839. os << " = ";
  840. os << padstring(rainfo, -30);
  841. os << padstring(rbinfo, -30);
  842. os << padstring(rcinfo, -30);
  843. return os;
  844. }
  845. void ThreadContext::print_rob(ostream& os) {
  846. os << "ROB head ", ROB.head, " to tail ", ROB.tail, " (", ROB.count, " entries):", endl;
  847. foreach_forward(ROB, i) {
  848. ReorderBufferEntry& rob = ROB[i];
  849. rob.print(os);
  850. os << endl;
  851. // os << " " << rob, endl;
  852. }
  853. }
  854. void ThreadContext::print_lsq(ostream& os) {
  855. os << "LSQ head ", LSQ.head, " to tail ", LSQ.tail, " (", LSQ.count, " entries):", endl, flush;
  856. foreach_forward(LSQ, i) {
  857. assert(i < LSQ_SIZE);
  858. LoadStoreQueueEntry& lsq = LSQ[i];
  859. os << " ", lsq, endl;
  860. }
  861. }
  862. void ThreadContext::print_rename_tables(ostream& os) {
  863. os << "SpecRRT:", endl;
  864. os << specrrt;
  865. os << "CommitRRT:", endl;
  866. os << commitrrt;
  867. }
  868. void OooCore::print_smt_state(ostream& os) {
  869. os << "Print SMT statistics:", endl;
  870. foreach (i, threadcount) {
  871. ThreadContext* thread = threads[i];
  872. os << "Thread ", i, ":", endl,
  873. " total_uops_committed ", thread->total_uops_committed, " iterations ", iterations, endl,
  874. " uipc ", double(thread->total_uops_committed) / double(iterations), endl,
  875. " total_insns_committed ", thread->total_insns_committed, " iterations ", iterations, endl,
  876. " ipc ", double(thread->total_insns_committed) / double(iterations), endl;
  877. }
  878. }
  879. void ThreadContext::dump_smt_state(ostream& os) {
  880. os << "SMT per-thread state for t", threadid, ":", endl;
  881. os << "Fetchrip: ", hexstring(fetchrip, 64), endl;
  882. print_rename_tables(os);
  883. print_rob(os);
  884. print_lsq(os);
  885. os << "ITLB: \n", itlb, endl;
  886. os << "DTLB: \n", dtlb, endl;
  887. os << flush;
  888. }
  889. void OooCore::dump_state(ostream& os) {
  890. os << "dump_state for core[",coreid,"]: SMT common structures:", endl;
  891. print_list_of_state_lists<PhysicalRegister>(os, physreg_states, "Physical register states");
  892. foreach (i, PHYS_REG_FILE_COUNT) {
  893. os << physregfiles[i];
  894. }
  895. print_list_of_state_lists<ReorderBufferEntry>(os, rob_states, "ROB entry states");
  896. os << "Issue Queues:", endl;
  897. foreach_issueq(print(os));
  898. // caches.print(os);
  899. os << "Unaligned predictor:", endl;
  900. os << " ", unaligned_predictor.popcount(), " unaligned bits out of ", UNALIGNED_PREDICTOR_SIZE, " bits", endl;
  901. os << " Raw data: ", unaligned_predictor, endl;
  902. foreach (i, threadcount) {
  903. ThreadContext* thread = threads[i];
  904. thread->dump_smt_state(os);
  905. }
  906. }
  907. //
  908. // Validate the physical register reference counters against what
  909. // is really accessible from the various tables and operand fields.
  910. //
  911. // This is for debugging only.
  912. //
  913. void OooCore::check_refcounts() {
  914. // this should be for each thread instead of whole core:
  915. // for now, we just work on thread[0];
  916. ThreadContext& thread = *threads[0];
  917. Queue<ReorderBufferEntry, ROB_SIZE>& ROB = thread.ROB;
  918. RegisterRenameTable& specrrt = thread.specrrt;
  919. RegisterRenameTable& commitrrt = thread.commitrrt;
  920. int refcounts[PHYS_REG_FILE_COUNT][MAX_PHYS_REG_FILE_SIZE];
  921. memset(refcounts, 0, sizeof(refcounts));
  922. foreach (rfid, PHYS_REG_FILE_COUNT) {
  923. // Null physreg in each register file is special and can never be freed:
  924. refcounts[rfid][PHYS_REG_NULL]++;
  925. }
  926. foreach_forward(ROB, i) {
  927. ReorderBufferEntry& rob = ROB[i];
  928. foreach (j, MAX_OPERANDS) {
  929. refcounts[rob.operands[j]->rfid][rob.operands[j]->index()]++;
  930. }
  931. }
  932. foreach (i, TRANSREG_COUNT) {
  933. refcounts[commitrrt[i]->rfid][commitrrt[i]->index()]++;
  934. refcounts[specrrt[i]->rfid][specrrt[i]->index()]++;
  935. }
  936. bool errors = 0;
  937. foreach (rfid, PHYS_REG_FILE_COUNT) {
  938. PhysicalRegisterFile& physregs = physregfiles[rfid];
  939. foreach (i, physregs.size) {
  940. if unlikely (physregs[i].refcount != refcounts[rfid][i]) {
  941. ptl_logfile << "ERROR: r", i, " refcount is ", physregs[i].refcount, " but should be ", refcounts[rfid][i], endl;
  942. foreach_forward(ROB, r) {
  943. ReorderBufferEntry& rob = ROB[r];
  944. foreach (j, MAX_OPERANDS) {
  945. if ((rob.operands[j]->index() == i) & (rob.operands[j]->rfid == rfid)) ptl_logfile << " ROB ", r, " operand ", j, endl;
  946. }
  947. }
  948. foreach (j, TRANSREG_COUNT) {
  949. if ((commitrrt[j]->index() == i) & (commitrrt[j]->rfid == rfid)) ptl_logfile << " CommitRRT ", arch_reg_names[j], endl;
  950. if ((specrrt[j]->index() == i) & (specrrt[j]->rfid == rfid)) ptl_logfile << " SpecRRT ", arch_reg_names[j], endl;
  951. }
  952. errors = 1;
  953. }
  954. }
  955. }
  956. if (errors) assert(false);
  957. }
  958. void OooCore::check_rob() {
  959. // this should be for each thread instead of whole core:
  960. // for now, we just work on thread[0];
  961. ThreadContext& thread = *threads[0];
  962. Queue<ReorderBufferEntry, ROB_SIZE>& ROB = thread.ROB;
  963. foreach (i, ROB_SIZE) {
  964. ReorderBufferEntry& rob = ROB[i];
  965. if (!rob.entry_valid) continue;
  966. assert(inrange((int)rob.forward_cycle, 0, (MAX_FORWARDING_LATENCY+1)-1));
  967. }
  968. foreach (i, threadcount) {
  969. ThreadContext* thread = threads[i];
  970. foreach (i, rob_states.count) {
  971. StateList& list = *(thread->rob_states[i]);
  972. ReorderBufferEntry* rob;
  973. foreach_list_mutable(list, rob, entry, nextentry) {
  974. assert(inrange(rob->index(), 0, ROB_SIZE-1));
  975. assert(rob->current_state_list == &list);
  976. if (!((rob->current_state_list != &thread->rob_free_list) ? rob->entry_valid : (!rob->entry_valid))) {
  977. ptl_logfile << "ROB ", rob->index(), " list = ", rob->current_state_list->name, " entry_valid ", rob->entry_valid, endl, flush;
  978. dump_state(ptl_logfile);
  979. assert(false);
  980. }
  981. }
  982. }
  983. }
  984. }
  985. ostream& LoadStoreQueueEntry::print(ostream& os) const {
  986. os << (store ? "st" : "ld"), intstring(index(), -3), " ";
  987. os << "uuid ", intstring(rob->uop.uuid, 10), " ";
  988. os << "rob ", intstring(rob->index(), -3), " ";
  989. os << "r", intstring(rob->physreg->index(), -3);
  990. if (PHYS_REG_FILE_COUNT > 1) os << "@", core->physregfiles[rob->physreg->rfid].name;
  991. os << " ";
  992. if (invalid) {
  993. os << "< Invalid: fault 0x", hexstring(data, 8), " > ";
  994. } else {
  995. if (datavalid)
  996. os << bytemaskstring((const byte*)&data, bytemask, 8);
  997. else os << "< Data Invalid >";
  998. os << " @ ";
  999. if (addrvalid)
  1000. os << "0x", hexstring(physaddr << 3, 48);
  1001. else os << "< Addr Inval >";
  1002. }
  1003. return os;
  1004. }
  1005. //
  1006. // Barriers must flush the fetchq and stall the frontend until
  1007. // after the barrier is consumed. Execution resumes at the address
  1008. // in internal register nextrip (rip after the instruction) after
  1009. // handling the barrier in microcode.
  1010. //
  1011. bool ThreadContext::handle_barrier() {
  1012. // Release resources of everything in the pipeline:
  1013. core_to_external_state();
  1014. if(current_basic_block) {
  1015. current_basic_block->release();
  1016. current_basic_block = NULL;
  1017. }
  1018. int assistid = ctx.eip;
  1019. assist_func_t assist = (assist_func_t)(Waddr)assistid_to_func[assistid];
  1020. // Special case for write_cr3 to flush before calling assist
  1021. if(assistid == ASSIST_WRITE_CR3) {
  1022. flush_pipeline();
  1023. }
  1024. if (logable(1)) {
  1025. ptl_logfile << "[vcpu ", ctx.cpu_index, "] Barrier (#", assistid, " -> ", (void*)assist, " ", assist_name(assist), " called from ",
  1026. (RIPVirtPhys(ctx.reg_selfrip).update(ctx)), "; return to ", (void*)(Waddr)ctx.reg_nextrip,
  1027. ") at ", sim_cycle, " cycles, ", total_user_insns_committed, " commits", endl, flush;
  1028. }
  1029. if (logable(6)) ptl_logfile << "Calling assist function at ", (void*)assist, "...", endl, flush;
  1030. thread_stats.assists[assistid]++;
  1031. if (logable(6)) {
  1032. ptl_logfile << "Before assist:", endl, ctx, endl;
  1033. }
  1034. bool flush_required = assist(ctx);
  1035. if (logable(6)) {
  1036. ptl_logfile << "Done with assist", endl;
  1037. ptl_logfile << "New state:", endl;
  1038. ptl_logfile << ctx;
  1039. }
  1040. // Flush again, but restart at possibly modified rip
  1041. if(flush_required) {
  1042. if (logable(6)) ptl_logfile << " handle_barrier, flush_pipeline again.",endl;
  1043. flush_pipeline();
  1044. if(config.checker_enabled) {
  1045. clear_checker();
  1046. }
  1047. } else {
  1048. reset_fetch_unit(ctx.eip);
  1049. }
  1050. return true;
  1051. }
  1052. bool ThreadContext::handle_exception() {
  1053. // Release resources of everything in the pipeline:
  1054. core_to_external_state();
  1055. if (logable(3)) ptl_logfile << " handle_exception, flush_pipeline.",endl;
  1056. flush_pipeline();
  1057. if (logable(4)) {
  1058. ptl_logfile << "[vcpu ", ctx.cpu_index, "] Exception ", exception_name(ctx.exception), " called from rip ", (void*)(Waddr)ctx.eip,
  1059. " at ", sim_cycle, " cycles, ", total_user_insns_committed, " commits", endl, flush;
  1060. }
  1061. //
  1062. // CheckFailed and SkipBlock exceptions are raised by the chk uop.
  1063. // This uop is used at the start of microcoded instructions to assert
  1064. // that certain conditions are true so complex corrective actions can
  1065. // be taken if the check fails.
  1066. //
  1067. // SkipBlock is a special case used for checks at the top of REP loops.
  1068. // Specifically, if the %rcx register is zero on entry to the REP, no
  1069. // action at all is to be taken; the rip should simply advance to
  1070. // whatever is in chk_recovery_rip and execution should resume.
  1071. //
  1072. // CheckFailed exceptions usually indicate the processor needs to take
  1073. // evasive action to avoid a user visible exception. For instance,
  1074. // CheckFailed is raised when an inlined floating point operand is
  1075. // denormal or otherwise cannot be handled by inlined fastpath uops,
  1076. // or when some unexpected segmentation or page table conditions
  1077. // arise.
  1078. //
  1079. if (ctx.exception == EXCEPTION_SkipBlock) {
  1080. ctx.eip = chk_recovery_rip;
  1081. if (logable(6)) ptl_logfile << "SkipBlock pseudo-exception: skipping to ", (void*)(Waddr)ctx.eip, endl, flush;
  1082. if (logable(3)) ptl_logfile << " EXCEPTION_SkipBlock, flush_pipeline.",endl;
  1083. flush_pipeline();
  1084. return true;
  1085. }
  1086. //
  1087. // Map PTL internal hardware exceptions to their x86 equivalents,
  1088. // depending on the context. The error_code field should already
  1089. // be filled out.
  1090. //
  1091. // Exceptions not listed here are propagated by microcode
  1092. // rather than the processor itself.
  1093. //
  1094. int write_exception = 0;
  1095. Waddr exception_address = ctx.page_fault_addr;
  1096. switch (ctx.exception) {
  1097. case EXCEPTION_PageFaultOnRead:
  1098. write_exception = 0;
  1099. goto handle_page_fault;
  1100. case EXCEPTION_PageFaultOnWrite:
  1101. write_exception = 1;
  1102. goto handle_page_fault;
  1103. case EXCEPTION_PageFaultOnExec:
  1104. write_exception = 2;
  1105. goto handle_page_fault;
  1106. handle_page_fault:
  1107. {
  1108. if (logable(10))
  1109. ptl_logfile << "Page fault exception address: ",
  1110. hexstring(exception_address, 64),
  1111. " is_write: ", write_exception, endl, ctx, endl;
  1112. assert(ctx.page_fault_addr != 0);
  1113. int old_exception = ctx.exception_index;
  1114. ctx.handle_interrupt = 1;
  1115. ctx.handle_page_fault(exception_address, write_exception);
  1116. // If we return here means the QEMU has fix the page fault
  1117. // witout causing any CPU faults so we can clear the pipeline
  1118. // and continue from current eip
  1119. flush_pipeline();
  1120. ctx.exception = 0;
  1121. ctx.exception_index = old_exception;
  1122. ctx.exception_is_int = 0;
  1123. return true;
  1124. }
  1125. break;
  1126. case EXCEPTION_FloatingPointNotAvailable:
  1127. ctx.exception_index= EXCEPTION_x86_fpu_not_avail; break;
  1128. case EXCEPTION_FloatingPoint:
  1129. ctx.exception_index= EXCEPTION_x86_fpu; break;
  1130. default:
  1131. ptl_logfile << "Unsupported internal exception type ", exception_name(ctx.exception), endl, flush;
  1132. assert(false);
  1133. }
  1134. if (logable(4)) {
  1135. ptl_logfile << ctx;
  1136. }
  1137. // We are not coming back from this call so flush the pipeline
  1138. // and all other things.
  1139. ctx.propagate_x86_exception(ctx.exception_index, ctx.error_code, ctx.page_fault_addr);
  1140. // Flush again, but restart at modified rip
  1141. if (logable(3)) ptl_logfile << " handle_exception, flush_pipeline again.",endl;
  1142. flush_pipeline();
  1143. return true;
  1144. }
  1145. bool ThreadContext::handle_interrupt() {
  1146. // Release resources of everything in the pipeline:
  1147. core_to_external_state();
  1148. if (logable(3)) ptl_logfile << " handle_interrupt, flush_pipeline.",endl;
  1149. if (logable(6)) {
  1150. ptl_logfile << "[vcpu ", threadid, "] interrupts pending at ", sim_cycle, " cycles, ", total_user_insns_committed, " commits", endl, flush;
  1151. ptl_logfile << "Context at interrupt:", endl;
  1152. ptl_logfile << ctx;
  1153. ptl_logfile.flush();
  1154. }
  1155. ctx.event_upcall();
  1156. if (logable(6)) {
  1157. ptl_logfile << "[vcpu ", threadid, "] after interrupt redirect:", endl;
  1158. ptl_logfile << ctx;
  1159. ptl_logfile.flush();
  1160. }
  1161. // Flush again, but restart at modified rip
  1162. if (logable(3)) ptl_logfile << " handle_interrupt, flush_pipeline again.",endl;
  1163. // update the stats
  1164. if(ctx.exit_request) {
  1165. thread_stats.cpu_exit_requests++;
  1166. } else {
  1167. thread_stats.interrupt_requests++;
  1168. }
  1169. return true;
  1170. }
  1171. void PhysicalRegister::fill_operand_info(PhysicalRegisterOperandInfo& opinfo) {
  1172. opinfo.physreg = index();
  1173. opinfo.state = state;
  1174. opinfo.rfid = rfid;
  1175. opinfo.archreg = archreg;
  1176. if (rob) {
  1177. opinfo.rob = rob->index();
  1178. opinfo.uuid = rob->uop.uuid;
  1179. }
  1180. }
  1181. ostream& OOO_CORE_MODEL::operator <<(ostream& os, const PhysicalRegisterOperandInfo& opinfo) {
  1182. os << "[r", opinfo.physreg, " ", short_physreg_state_names[opinfo.state], " ";
  1183. switch (opinfo.state) {
  1184. case PHYSREG_WAITING:
  1185. case PHYSREG_BYPASS:
  1186. case PHYSREG_WRITTEN:
  1187. os << "rob ", opinfo.rob, " uuid ", opinfo.uuid; break;
  1188. case PHYSREG_ARCH:
  1189. case PHYSREG_PENDINGFREE:
  1190. os << arch_reg_names[opinfo.archreg]; break;
  1191. };
  1192. os << "]";
  1193. return os;
  1194. }
  1195. void OooCore::flush_tlb(Context& ctx) {
  1196. foreach(i, threadcount) {
  1197. threads[i]->dtlb.flush_all();
  1198. threads[i]->itlb.flush_all();
  1199. }
  1200. }
  1201. void OooCore::flush_tlb_virt(Context& ctx, Waddr virtaddr) {
  1202. // FIXME AVADH DEFCORE
  1203. }
  1204. void OooCore::check_ctx_changes()
  1205. {
  1206. foreach(i, threadcount) {
  1207. Context& ctx = threads[i]->ctx;
  1208. ctx.handle_interrupt = 0;
  1209. if(logable(4))
  1210. ptl_logfile << " Ctx[", ctx.cpu_index, "] eflags: ", (void*)ctx.eflags, endl;
  1211. if(ctx.eip != ctx.old_eip) {
  1212. if(logable(5))
  1213. ptl_logfile << "Old_eip: ", (void*)(ctx.old_eip), " New_eip: " ,
  1214. (void*)(ctx.eip), endl;
  1215. // IP address is changed, so flush the pipeline
  1216. threads[i]->flush_pipeline();
  1217. }
  1218. }
  1219. }
  1220. void OooCore::update_stats()
  1221. {
  1222. }
  1223. OooCoreBuilder::OooCoreBuilder(const char* name)
  1224. : CoreBuilder(name)
  1225. {
  1226. }
  1227. BaseCore* OooCoreBuilder::get_new_core(BaseMachine& machine,
  1228. const char* name)
  1229. {
  1230. OooCore* core = new OooCore(machine, 1, name);
  1231. return core;
  1232. }
  1233. namespace OOO_CORE_MODEL {
  1234. OooCoreBuilder defaultCoreBuilder(OOO_CORE_NAME);
  1235. };
  1236. namespace OOO_CORE_MODEL {
  1237. CycleTimer cttotal;
  1238. CycleTimer ctfetch;
  1239. CycleTimer ctdecode;
  1240. CycleTimer ctrename;
  1241. CycleTimer ctfrontend;
  1242. CycleTimer ctdispatch;
  1243. CycleTimer ctissue;
  1244. CycleTimer ctissueload;
  1245. CycleTimer ctissuestore;
  1246. CycleTimer ctcomplete;
  1247. CycleTimer cttransfer;
  1248. CycleTimer ctwriteback;
  1249. CycleTimer ctcommit;
  1250. };