/ptlsim/lib/logic.h
C Header | 1876 lines | 1395 code | 352 blank | 129 comment | 109 complexity | d8ed1b0c58eeb24946ee0035322ef878 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0
- // -*- c++ -*-
- //
- // Sequential Logic Primitives for C++
- //
- // Copyright 1999-2008 Matt T. Yourst <yourst@yourst.com>
- //
- #ifndef _LOGIC_H_
- #define _LOGIC_H_
- #include <globals.h>
- #include <superstl.h>
- inline vec16b x86_sse_ldvbu(const vec16b* m) { vec16b rd; asm("movdqu %[m],%[rd]" : [rd] "=x" (rd) : [m] "xm" (*m)); return rd; }
- inline void x86_sse_stvbu(vec16b* m, const vec16b ra) { asm("movdqu %[ra],%[m]" : [m] "=xm" (*m) : [ra] "x" (ra) : "memory"); }
- inline vec8w x86_sse_ldvwu(const vec8w* m) {
- vec8w rd;
- asm("movdqu %[m],%[rd]" : [rd] "=x" (rd) : [m] "m" (*m));
- return rd;
- }
- //inline vec8w x86_sse_ldvwu(const vec8w* m) { vec8w rd; asm("movdqu %[rd], %[m]" : [rd] "=x" (rd) : [m] "xm" (*m)); return rd; }
- inline void x86_sse_stvwu(vec8w* m, const vec8w ra) { asm("movdqu %[ra],%[m]" : [m] "=m" (*m) : [ra] "x" (ra) : "memory"); }
- extern ofstream ptl_logfile;
- extern ofstream yaml_stats_file;
- template <typename T>
- struct latch {
- T data;
- T newdata;
- latch() {
- reset();
- }
- void reset(const T& d = T()) {
- data = d;
- newdata = d;
- }
- latch(const T& t) { newdata = t; }
- operator T() const { return data; }
- T& operator =(const T& t) {
- newdata = t; return data;
- }
- void clock(bool clkenable = true) {
- if (clkenable)
- data = newdata;
- }
- };
- template <typename T, int size>
- struct SynchronousRegisterFile {
- SynchronousRegisterFile() {
- reset();
- }
- void reset() {
- for (int i = 0; i < size; i++) {
- data[i].data = 0;
- data[i].newdata = 0;
- }
- }
- latch<T> data[size];
- latch<T>& operator [](int i) {
- return data[i];
- }
- void clock(bool clkenable = true) {
- if (!clkenable)
- return;
- for (int i = 0; i < size; i++) {
- data[i].clock();
- }
- }
- };
- //
- // Queue
- //
- // Iterate forward through queue from head to tail
- #define foreach_forward(Q, i) for (int i = (Q).head; i != (Q).tail; i = add_index_modulo(i, +1, (Q).size))
- // Iterate forward through queue from the specified entry until the tail
- #define foreach_forward_from(Q, E, i) for (int i = E->index(); i != (Q).tail; i = add_index_modulo(i, +1, (Q).size))
- // Iterate forward through queue from the entry after the specified entry until the tail
- #define foreach_forward_after(Q, E, i) for (int i = add_index_modulo(E->index(), +1, (Q).size); i != (Q).tail; i = add_index_modulo(i, +1, (Q).size))
- // Iterate backward through queue from tail to head
- #define foreach_backward(Q, i) for (int i = add_index_modulo((Q).tail, -1, (Q).size); i != add_index_modulo((Q).head, -1, (Q).size); i = add_index_modulo(i, -1, (Q).size))
- // Iterate backward through queue from the specified entry until the tail
- #define foreach_backward_from(Q, E, i) for (int i = E->index(); i != add_index_modulo((Q).head, -1, (Q).size); i = add_index_modulo(i, -1, (Q).size))
- // Iterate backward through queue from the entry before the specified entry until the head
- #define foreach_backward_before(Q, E, i) for (int i = add_index_modulo(E->index(), -1, (Q).size); ((i != add_index_modulo((Q).head, -1, (Q).size)) && (E->index() != (Q).head)); i = add_index_modulo(i, -1, (Q).size))
- template <class T, int SIZE>
- struct FixedQueue: public array<T, SIZE> {
- int head; // used for allocation
- int tail; // used for deallocation
- int count; // count of entries
- static const int size = SIZE;
- FixedQueue() {
- reset();
- }
- void flush() {
- head = tail = count = 0;
- }
- void reset() {
- head = tail = count = 0;
- }
- int remaining() const {
- return max((SIZE - count) - 1, 0);
- }
- bool empty() const {
- return (!count);
- }
- bool full() const {
- return (!remaining());
- }
- T* alloc() {
- if (!remaining())
- return NULL;
- T* entry = &(*this)[tail];
- tail = add_index_modulo(tail, +1, SIZE);
- count++;
- return entry;
- }
- T* push() {
- return alloc();
- }
- T* push(const T& data) {
- T* slot = push();
- if (!slot) return NULL;
- *slot = data;
- return slot;
- }
- T* enqueue(const T& data) {
- return push(data);
- }
- void commit(T& entry) {
- assert(entry.index() == head);
- count--;
- head = add_index_modulo(head, +1, SIZE);
- }
- void annul(T& entry) {
- // assert(entry.index() == add_index_modulo(tail, -1, SIZE));
- count--;
- tail = add_index_modulo(tail, -1, SIZE);
- }
- T* pop() {
- if (empty()) return NULL;
- tail = add_index_modulo(tail, -1, SIZE);
- count--;
- return &(*this)[tail];
- }
- T* peek() {
- if (empty())
- return NULL;
- return &(*this)[head];
- }
- T* dequeue() {
- if (empty())
- return NULL;
- count--;
- T* entry = &(*this)[head];
- head = add_index_modulo(head, +1, SIZE);
- return entry;
- }
- void commit(T* entry) { commit(*entry); }
- void annul(T* entry) { annul(*entry); }
- T* pushhead() {
- if (full()) return NULL;
- head = add_index_modulo(head, -1, SIZE);
- count++;
- return &(*this)[head];
- }
- T* pophead() {
- if (empty()) return NULL;
- T* p = &(*this)[head];
- count--;
- head = add_index_modulo(head, +1, SIZE);
- return p;
- }
- T* peekhead() {
- if (empty()) return NULL;
- return &(*this)[head];
- }
- T* peektail() {
- if (empty()) return NULL;
- int t = add_index_modulo(tail, -1, SIZE);
- return &(*this)[t];
- }
- T& operator ()(int index) {
- index = add_index_modulo(head, index, SIZE);
- return (*this)[index];
- }
- const T& operator ()(int index) const {
- index = add_index_modulo(head, index, SIZE);
- return (*this)[index];
- }
- ostream& print(ostream& os) const {
- os << "Queue<", SIZE, ">: head ", head, " to tail ", tail, " (", count, " entries):", endl;
- foreach_forward((*this), i) {
- const T& entry = (*this)[i];
- os << " slot ", intstring(i, 3), ": ", entry, endl;
- }
- return os;
- }
- };
- template <class T, int SIZE>
- ostream& operator <<(ostream& os, FixedQueue<T, SIZE>& queue) {
- return queue.print(os);
- }
- template <class T, int SIZE>
- struct Queue: public FixedQueue<T, SIZE> {
- typedef FixedQueue<T, SIZE> base_t;
- Queue() {
- reset();
- }
- void reset() {
- base_t::reset();
- foreach (i, SIZE) {
- (*this)[i].init(i);
- }
- }
- T* alloc() {
- T* p = base_t::alloc();
- if likely (p) p->validate();
- return p;
- }
- };
- template <class T, int size>
- ostream& operator <<(ostream& os, const Queue<T, size>& queue) {
- os << "Queue<", size, "]: head ", queue.head, " to tail ", queue.tail, " (", queue.count, " entries):", endl;
- foreach_forward(queue, i) {
- const T& entry = queue[i];
- os << " ", entry, endl;
- }
- return os;
- }
- template <typename T, int size>
- struct HistoryBuffer: public array<T, size> {
- int current;
- T prevoldest;
- void reset() {
- current = size-1;
- setzero(this->data);
- }
- HistoryBuffer() {
- reset();
- }
- //
- // Enqueue t at the tail of the queue, making the results
- // visible for possible dequeueing by an earlier pipeline
- // stage within the same cycle (i.e., forwarding is used).
- // If this is not desirable, use enqueuesync() instead.
- //
- void add(const T& t) {
- current = add_index_modulo(current, +1, size);
- prevoldest = this->data[current];
- this->data[current] = t;
- }
- /*
- * Undo last addition
- */
- void undo() {
- this->data[current] = prevoldest;
- current = add_index_modulo(current, -1, size);
- }
- /*
- * Index backwards in time: 0 = most recent addition
- */
- T& operator [](int index) {
- int idx = add_index_modulo(current, -index, size);
- //assert(inrange(idx, 0, size-1));
- return this->data[idx];
- }
- const T& operator [](int index) const {
- int idx = add_index_modulo(current, -index, size);
- //assert(inrange(idx, 0, size-1));
- return this->data[idx];
- }
- };
- template <class T, int size>
- ostream& operator <<(ostream& os, HistoryBuffer<T, size>& history) {
- os << "HistoryBuffer[", size, "]: current = ", history.current, ", prevoldest = ", history.prevoldest, endl;
- for (int i = 0; i < size; i++) {
- os << " ", history[i], endl;
- }
- return os;
- }
- //
- // Fully Associative Arrays
- //
- template <typename T> struct InvalidTag { static const T INVALID; };
- template <> struct InvalidTag<W64> { static const W64 INVALID = 0xffffffffffffffffULL; };
- template <> struct InvalidTag<W32> { static const W32 INVALID = 0xffffffff; };
- template <> struct InvalidTag<W16> { static const W16 INVALID = 0xffff; };
- template <> struct InvalidTag<W8> { static const W8 INVALID = 0xff; };
- //
- // The replacement policy is pseudo-LRU using a most recently used
- // bit vector (mLRU), as described in the paper "Performance Evaluation
- // of Cache Replacement Policies for the SPEC CPU2000 Benchmark Suite"
- // by Al-Zoubi et al. Essentially we maintain one MRU bit per way and
- // set the bit for the way when that way is accessed. The way to evict
- // is the first way without its MRU bit set. If all MRU bits become
- // set, they are all reset and we start over. Surprisingly, this
- // simple method performs as good as, if not better than, true LRU
- // or tree-based hot sector LRU.
- //
- template <typename T, int ways>
- struct FullyAssociativeTags {
- bitvec<ways> evictmap;
- T tags[ways];
- static const T INVALID = InvalidTag<T>::INVALID;
- FullyAssociativeTags() {
- reset();
- }
- void reset() {
- evictmap = 0;
- foreach (i, ways) {
- tags[i] = INVALID;
- }
- }
- void use(int way) {
- evictmap[way] = 1;
- // Performance is somewhat better with this off with higher associativity caches:
- // if (evictmap.allset()) evictmap = 0;
- }
- //
- // This is a clever way of doing branch-free matching
- // with conditional moves and addition. It relies on
- // having at most one matching entry in the array;
- // otherwise the algorithm breaks:
- //
- int match(T target) {
- int way = 0;
- foreach (i, ways) {
- way += (tags[i] == target) ? (i + 1) : 0;
- }
- return way - 1;
- }
- int probe(T target) {
- int way = match(target);
- if (way < 0) return -1;
- use(way);
- return way;
- }
- int lru() const {
- return (evictmap.allset()) ? 0 : (~evictmap).lsb();
- }
- int select(T target, T& oldtag) {
- int way = probe(target);
- if (way < 0) {
- way = lru();
- if (evictmap.allset()) evictmap = 0;
- oldtag = tags[way];
- tags[way] = target;
- }
- use(way);
- if (evictmap.allset()) {
- evictmap = 0;
- use(way);
- }
- return way;
- }
- int select(T target) {
- T dummy;
- return select(target, dummy);
- }
- void invalidate_way(int way) {
- tags[way] = INVALID;
- evictmap[way] = 0;
- }
- int invalidate(T target) {
- int way = probe(target);
- if (way < 0) return -1;
- invalidate_way(way);
- return way;
- }
- const T& operator [](int index) const { return tags[index]; }
- T& operator [](int index) { return tags[index]; }
- int operator ()(T target) { return probe(target); }
- stringbuf& printway(stringbuf& os, int i) const {
- os << " way ", intstring(i, -2), ": ";
- if (tags[i] != INVALID) {
- os << "tag 0x", hexstring(tags[i], sizeof(T)*8);
- if (evictmap[i]) os << " (MRU)";
- } else {
- os << "<invalid>";
- }
- return os;
- }
- stringbuf& print(stringbuf& os) const {
- foreach (i, ways) {
- printway(os, i);
- os << endl;
- }
- return os;
- }
- ostream& print(ostream& os) const {
- stringbuf sb;
- print(sb);
- os << sb;
- return os;
- }
- };
- template <typename T, int ways>
- ostream& operator <<(ostream& os, const FullyAssociativeTags<T, ways>& tags) {
- return tags.print(os);
- }
- template <typename T, int ways>
- stringbuf& operator <<(stringbuf& sb, const FullyAssociativeTags<T, ways>& tags) {
- return tags.print(sb);
- }
- //
- // Associative array implemented using vectorized
- // comparisons spread across multiple byte slices
- // and executed in parallel.
- //
- // This implementation is roughly 2-4x as fast
- // as the naive scalar code on SSE2 machines,
- // especially for larger arrays.
- //
- // Very small arrays (less than 8 entries) should
- // use the normal scalar FullyAssociativeTags
- // for best performance. Both classes use the
- // same principle for very fast one-hot matching.
- //
- // Limitations:
- //
- // - Every tag in the array must be unique,
- // except for the invalid tag (all 1s)
- //
- // - <size> can be from 1 to 128. Technically
- // up to 254, however element 255 cannot
- // be used. Matching is done in groups
- // of 16 elements in parallel.
- //
- // - <width> in bits can be from 1 to 64
- //
- template <int size, int width, int padsize = 0>
- struct FullyAssociativeTagsNbitOneHot {
- typedef vec16b vec_t;
- typedef W64 base_t;
- static const int slices = (width + 7) / 8;
- static const int chunkcount = (size+15) / 16;
- static const int padchunkcount = (padsize+15) / 16;
- vec16b tags[slices][chunkcount + padchunkcount] alignto(16);
- base_t tagsmirror[size]; // for fast scalar access
- bitvec<size> valid;
- bitvec<size> evictmap;
- FullyAssociativeTagsNbitOneHot() {
- reset();
- }
- void reset() {
- valid = 0;
- evictmap = 0;
- memset(tags, 0xff, sizeof(tags));
- memset(tagsmirror, 0xff, sizeof(tagsmirror));
- }
- int match(const vec16b* targetslices) const {
- vec16b sum = x86_sse_zerob();
- foreach (i, chunkcount) {
- vec16b eq = *((vec16b*)&index_bytes_plus1_vec16b[i]);
- foreach (j, slices) {
- eq = x86_sse_pandb(x86_sse_pcmpeqb(tags[j][i], targetslices[j]), eq);
- }
- sum = x86_sse_psadbw(sum, eq);
- }
- int idx = (x86_sse_pextrw<0>(sum) + x86_sse_pextrw<4>(sum));
- return idx-1;
- }
- static void prep(vec16b* targetslices, base_t tag) {
- foreach (i, slices) {
- targetslices[i] = x86_sse_dupb((byte)tag);
- tag >>= 8;
- }
- }
- int match(base_t tag) const {
- vec16b targetslices[16];
- prep(targetslices, tag);
- return match(targetslices);
- }
- int search(base_t tag) const {
- return match(tag);
- }
- int operator()(base_t tag) const {
- return search(tag);
- }
- void update(int index, base_t tag) {
- // Spread it across all the words
- base_t t = tag;
- foreach (i, slices) {
- *(((byte*)(&tags[i])) + index) = (byte)t;
- t >>= 8;
- }
- tagsmirror[index] = tag;
- valid[index] = 1;
- evictmap[index] = 1;
- }
- class ref {
- friend class FullyAssociativeTagsNbitOneHot;
- FullyAssociativeTagsNbitOneHot<size, width, padsize>& tags;
- int index;
- ref();
- public:
- inline ref(FullyAssociativeTagsNbitOneHot& tags_, int index_): tags(tags_), index(index_) { }
- inline ~ref() { }
- inline ref& operator =(base_t tag) {
- tags.update(index, tag);
- return *this;
- }
- inline ref& operator =(const ref& other) {
- tags.update(index, other.tagsmirror[other.index]);
- return *this;
- }
- };
- friend class ref;
- ref operator [](int index) { return ref(*this, index); }
- base_t operator [](int index) const { return tagsmirror[index]; }
- bool isvalid(int index) {
- return valid[index];
- }
- int insertslot(int idx, base_t tag) {
- valid[idx] = 1;
- (*this)[idx] = tag;
- return idx;
- }
- int insert(base_t tag) {
- if (valid.allset()) return -1;
- int idx = (~valid).lsb();
- return insertslot(idx, tag);
- }
- void invalidateslot(int index) {
- valid[index] = 0;
- (*this)[index] = 0xffffffffffffffffULL; // invalid marker
- }
- void validateslot(int index) {
- valid[index] = 1;
- }
- int invalidate(base_t target) {
- int index = match(target);
- if (index < 0) return 0;
- invalidateslot(index);
- return 1;
- }
- bitvec<size> masked_match(base_t targettag, base_t tagmask) {
- bitvec<size> m;
- foreach (i, size) {
- base_t tag = tagsmirror[i];
- m[i] = ((tag & tagmask) == targettag);
- }
- return m;
- }
- void masked_invalidate(const bitvec<size>& slotmask) {
- foreach (i, size) {
- if unlikely (slotmask[i]) invalidateslot(i);
- }
- }
- void use(int way) {
- evictmap[way] = 1;
- if (evictmap.allset()) {
- evictmap = 0;
- evictmap[way] = 1;
- }
- }
- int probe(base_t target) {
- int way = match(target);
- if (way < 0) return way;
- use(way);
- return way;
- }
- int lru() const {
- return (evictmap.allset()) ? 0 : (~evictmap).lsb();
- }
- int select(base_t target, base_t& oldtag) {
- int way = probe(target);
- if (way < 0) {
- way = lru();
- if (evictmap.allset()) evictmap = 0;
- oldtag = tagsmirror[way];
- update(way, target);
- }
- use(way);
- return way;
- }
- int select(base_t target) {
- base_t dummy;
- return select(target, dummy);
- }
- ostream& printid(ostream& os, int slot) const {
- base_t tag = (*this)[slot];
- os << intstring(slot, 3), ": ";
- os << hexstring(tag, 64);
- os << " ";
- foreach (i, slices) {
- const byte b = *(((byte*)(&tags[i])) + slot);
- os << " ", hexstring(b, 8);
- }
- if (!valid[slot]) os << " <invalid>";
- return os;
- }
- ostream& print(ostream& os) const {
- foreach (i, size) {
- printid(os, i);
- os << endl;
- }
- return os;
- }
- };
- template <int size, int width, int padsize>
- ostream& operator <<(ostream& os, const FullyAssociativeTagsNbitOneHot<size, width, padsize>& tags) {
- return tags.print(os);
- }
- template <typename T, typename V>
- struct NullAssociativeArrayStatisticsCollector {
- static void inserted(V& elem, T newtag, int way) { }
- static void replaced(V& elem, T oldtag, T newtag, int way) { }
- static void probed(V& elem, T tag, int way, bool hit) { }
- static void overflow(T tag) { }
- static void locked(V& slot, T tag, int way) { }
- static void unlocked(V& slot, T tag, int way) { }
- static void invalidated(V& elem, T oldtag, int way) { }
- };
- template <typename T, typename V, int ways, typename stats = NullAssociativeArrayStatisticsCollector<T, V> >
- struct FullyAssociativeArray {
- FullyAssociativeTags<T, ways> tags;
- V data[ways];
- FullyAssociativeArray() {
- reset();
- }
- void reset() {
- tags.reset();
- foreach (i, ways) { data[i].reset(); }
- }
- V* probe(T tag) {
- int way = tags.probe(tag);
- stats::probed((way < 0) ? data[0] : data[way], tag, way, (way >= 0));
- return (way < 0) ? NULL : &data[way];
- }
- V* match(T tag) {
- int way = tags.match(tag);
- return (way < 0) ? NULL : &data[way];
- }
- V* select(T tag, T& oldtag) {
- int way = tags.select(tag, oldtag);
- V& slot = data[way];
- if ((way >= 0) & (tag == oldtag)) {
- stats::probed(slot, tag, way, 1);
- } else {
- if (oldtag == tags.INVALID)
- stats::inserted(slot, tag, way);
- else stats::replaced(slot, oldtag, tag, way);
- }
- return &slot;
- }
- V* select(T tag) {
- T dummy;
- return select(tag, dummy);
- }
- int wayof(const V* line) const {
- int way = (line - (const V*)&data);
- #if 0
- assert(inrange(way, 0, ways-1));
- #endif
- return way;
- }
- T tagof(V* line) {
- int way = wayof(line);
- return tags.tags[way];
- }
- void invalidate_way(int way) {
- stats::invalidated(data[way], tags[way], way);
- tags.invalidate_way(way);
- data[way].reset();
- }
- void invalidate_line(V* line) {
- invalidate_way(wayof(line));
- }
- int invalidate(T tag) {
- int way = tags.probe(tag);
- if (way < 0) return -1;
- invalidate_way(way);
- return way;
- }
- V& operator [](int way) { return data[way]; }
- V* operator ()(T tag) { return select(tag); }
- ostream& print(ostream& os) const {
- foreach (i, ways) {
- stringbuf sb;
- tags.printway(sb, i);
- os << padstring(sb, -40), " -> ";
- data[i].print(os, tags.tags[i]);
- os << endl;
- }
- return os;
- }
- };
- template <typename T, typename V, int ways>
- ostream& operator <<(ostream& os, const FullyAssociativeArray<T, V, ways>& assoc) {
- return assoc.print(os);
- }
- template <typename T, typename V, int setcount, int waycount, int linesize, typename stats = NullAssociativeArrayStatisticsCollector<T, V> >
- struct AssociativeArray {
- typedef FullyAssociativeArray<T, V, waycount, stats> Set;
- Set sets[setcount];
- AssociativeArray() {
- reset();
- }
- void reset() {
- foreach (set, setcount) {
- sets[set].reset();
- }
- }
- static int setof(T addr) {
- return bits(addr, log2(linesize), log2(setcount));
- }
- static T tagof(T addr) {
- return floor(addr, linesize);
- }
- V* probe(T addr) {
- return sets[setof(addr)].probe(tagof(addr));
- }
- V* match(T addr) {
- return sets[setof(addr)].match(tagof(addr));
- }
- V* select(T addr, T& oldaddr) {
- return sets[setof(addr)].select(tagof(addr), oldaddr);
- }
- V* select(T addr) {
- T dummy;
- return sets[setof(addr)].select(tagof(addr), dummy);
- }
- int invalidate(T addr) {
- return sets[setof(addr)].invalidate(tagof(addr));
- }
- ostream& print(ostream& os) const {
- os << "AssociativeArray<", setcount, " sets, ", waycount, " ways, ", linesize, "-byte lines>:", endl;
- foreach (set, setcount) {
- os << " Set ", set, ":", endl;
- os << sets[set];
- }
- return os;
- }
- };
- template <typename T, typename V, int size, int ways, int linesize>
- ostream& operator <<(ostream& os, const AssociativeArray<T, V, size, ways, linesize>& aa) {
- return aa.print(os);
- }
- //
- // Lockable version of associative arrays:
- //
- template <typename T, int ways>
- struct LockableFullyAssociativeTags {
- bitvec<ways> evictmap;
- bitvec<ways> unlockedmap;
- T tags[ways];
- static const T INVALID = InvalidTag<T>::INVALID;
- LockableFullyAssociativeTags() {
- reset();
- }
- void reset() {
- evictmap = 0;
- unlockedmap.setall();
- foreach (i, ways) {
- tags[i] = INVALID;
- }
- }
- void use(int way) {
- evictmap[way] = 1;
- // Performance is somewhat better with this off with higher associativity caches:
- // if (evictmap.allset()) evictmap = 0;
- }
- //
- // This is a clever way of doing branch-free matching
- // with conditional moves and addition. It relies on
- // having at most one matching entry in the array;
- // otherwise the algorithm breaks:
- //
- int match(T target) {
- int way = 0;
- foreach (i, ways) {
- way += (tags[i] == target) ? (i + 1) : 0;
- }
- return way - 1;
- }
- int probe(T target) {
- int way = match(target);
- if (way < 0) return -1;
- use(way);
- return way;
- }
- int lru() const {
- if (!unlockedmap) return -1;
- bitvec<ways> w = (~evictmap) & unlockedmap;
- return (*w) ? w.lsb() : 0;
- }
- int select(T target, T& oldtag) {
- int way = probe(target);
- if (way < 0) {
- way = lru();
- if (way < 0) return -1;
- if (evictmap.allset()) evictmap = 0;
- oldtag = tags[way];
- tags[way] = target;
- }
- use(way);
- return way;
- }
- int select(T target) {
- T dummy;
- return select(target, dummy);
- }
- int select_and_lock(T tag, bool& firstlock, T& oldtag) {
- int way = select(tag, oldtag);
- if (way < 0) return way;
- firstlock = unlockedmap[way];
- lock(way);
- return way;
- }
- int select_and_lock(T tag, bool& firstlock) {
- T dummy;
- return select_and_lock(tag, firstlock, dummy);
- }
- int select_and_lock(T target) { bool dummy; return select_and_lock(target, dummy); }
- void invalidate_way(int way) {
- tags[way] = INVALID;
- evictmap[way] = 0;
- unlockedmap[way] = 1;
- }
- int invalidate(T target) {
- int way = probe(target);
- if (way < 0) return -1;
- invalidate_way(way);
- }
- bool islocked(int way) const { return !unlockedmap[way]; }
- void lock(int way) { unlockedmap[way] = 0; }
- void unlock(int way) { unlockedmap[way] = 1; }
- const T& operator [](int index) const { return tags[index]; }
- T& operator [](int index) { return tags[index]; }
- int operator ()(T target) { return probe(target); }
- stringbuf& printway(stringbuf& os, int i) const {
- os << " way ", intstring(i, -2), ": ";
- if (tags[i] != INVALID) {
- os << "tag 0x", hexstring(tags[i], sizeof(T)*8);
- if (evictmap[i]) os << " (MRU)";
- if (!unlockedmap[i]) os << " (locked)";
- } else {
- os << "<invalid>";
- }
- return os;
- }
- stringbuf& print(stringbuf& os) const {
- foreach (i, ways) {
- printway(os, i);
- os << endl;
- }
- return os;
- }
- ostream& print(ostream& os) const {
- stringbuf sb;
- print(sb);
- os << sb;
- return os;
- }
- };
- template <typename T, int ways>
- ostream& operator <<(ostream& os, const LockableFullyAssociativeTags<T, ways>& tags) {
- return tags.print(os);
- }
- template <typename T, int ways>
- stringbuf& operator <<(stringbuf& sb, const LockableFullyAssociativeTags<T, ways>& tags) {
- return tags.print(sb);
- }
- template <typename T, typename V, int ways, typename stats = NullAssociativeArrayStatisticsCollector<T, V> >
- struct LockableFullyAssociativeArray {
- LockableFullyAssociativeTags<T, ways> tags;
- V data[ways];
- LockableFullyAssociativeArray() {
- reset();
- }
- void reset() {
- tags.reset();
- foreach (i, ways) { data[i].reset(); }
- }
- V* probe(T tag) {
- int way = tags.probe(tag);
- stats::probed((way < 0) ? data[0] : data[way], tag, way, (way >= 0));
- return (way < 0) ? NULL : &data[way];
- }
- V* select(T tag, T& oldtag) {
- int way = tags.select(tag, oldtag);
- if (way < 0) {
- stats::overflow(tag);
- return NULL;
- }
- V& slot = data[way];
- if ((way >= 0) & (tag == oldtag)) {
- stats::probed(slot, tag, way, 1);
- } else {
- if (oldtag == tags.INVALID)
- stats::inserted(slot, tag, way);
- else stats::replaced(slot, oldtag, tag, way);
- }
- return &slot;
- }
- V* select(T tag) {
- T dummy;
- return select(tag, dummy);
- }
- V* select_and_lock(T tag, bool& firstlock, T& oldtag) {
- int way = tags.select_and_lock(tag, firstlock, oldtag);
- if (way < 0) {
- stats::overflow(tag);
- return NULL;
- }
- V& slot = data[way];
- if (tag == oldtag) {
- stats::probed(slot, tag, way, 1);
- } else {
- if (oldtag == tags.INVALID)
- stats::inserted(slot, tag, way);
- else stats::replaced(slot, oldtag, tag, way);
- stats::locked(slot, tag, way);
- }
- return &slot;
- }
- V* select_and_lock(T tag, bool& firstlock) {
- T dummy;
- return select_and_lock(tag, firstlock, dummy);
- }
- V* select_and_lock(T tag) { bool dummy; return select_and_lock(tag, dummy); }
- int wayof(const V* line) const {
- int way = (line - (const V*)&data);
- #if 0
- assert(inrange(way, 0, ways-1));
- #endif
- return way;
- }
- T tagof(V* line) {
- int way = wayof(line);
- return tags.tags[way];
- }
- void invalidate_way(int way) {
- unlock_way(way);
- stats::invalidated(data[way], tags[way], way);
- tags.invalidate_way(way);
- data[way].reset();
- }
- void invalidate_line(V* line) {
- invalidate_way(wayof(line));
- }
- int invalidate(T tag) {
- int way = tags.probe(tag);
- if (way < 0) return -1;
- invalidate_way(way);
- return way;
- }
- void unlock_way(int way) {
- stats::unlocked(data[way], tags[way], way);
- tags.unlock(way);
- }
- void unlock_line(V* line) {
- unlock_way(wayof(line));
- }
- int unlock(T tag) {
- int way = tags.probe(tag);
- if (way < 0) return;
- unlock_way(way);
- if (tags.islocked(way)) stats::unlocked(data[way], tags[way], way);
- return way;
- }
- V& operator [](int way) { return data[way]; }
- V* operator ()(T tag) { return select(tag); }
- ostream& print(ostream& os) const {
- foreach (i, ways) {
- stringbuf sb;
- tags.printway(sb, i);
- os << padstring(sb, -40), " -> ";
- data[i].print(os, tags.tags[i]);
- os << endl;
- }
- return os;
- }
- };
- template <typename T, typename V, int ways>
- ostream& operator <<(ostream& os, const LockableFullyAssociativeArray<T, V, ways>& assoc) {
- return assoc.print(os);
- }
- template <typename T, typename V, int setcount, int waycount, int linesize, typename stats = NullAssociativeArrayStatisticsCollector<T, V> >
- struct LockableAssociativeArray {
- typedef LockableFullyAssociativeArray<T, V, waycount, stats> Set;
- Set sets[setcount];
- LockableAssociativeArray() {
- reset();
- }
- void reset() {
- foreach (set, setcount) {
- sets[set].reset();
- }
- }
- static int setof(T addr) {
- return bits(addr, log2(linesize), log2(setcount));
- }
- static T tagof(T addr) {
- return floor(addr, linesize);
- }
- V* probe(T addr) {
- return sets[setof(addr)].probe(tagof(addr));
- }
- V* select(T addr, T& oldaddr) {
- return sets[setof(addr)].select(tagof(addr), oldaddr);
- }
- V* select(T addr) {
- T dummy;
- return select(addr, dummy);
- }
- void invalidate(T addr) {
- sets[setof(addr)].invalidate(tagof(addr));
- }
- V* select_and_lock(T addr, bool& firstlock, T& oldtag) {
- V* line = sets[setof(addr)].select_and_lock(tagof(addr), firstlock, oldtag);
- return line;
- }
- V* select_and_lock(T addr, bool& firstlock) {
- W64 dummy;
- return select_and_lock(addr, firstlock, dummy);
- }
- V* select_and_lock(T addr) { bool dummy; return select_and_lock(addr, dummy); }
- ostream& print(ostream& os) const {
- os << "LockableAssociativeArray<", setcount, " sets, ", waycount, " ways, ", linesize, "-byte lines>:", endl;
- foreach (set, setcount) {
- os << " Set ", set, ":", endl;
- os << sets[set];
- }
- return os;
- }
- };
- template <typename T, typename V, int size, int ways, int linesize>
- ostream& operator <<(ostream& os, const LockableAssociativeArray<T, V, size, ways, linesize>& aa) {
- return aa.print(os);
- }
- template <typename T, int setcount, int linesize>
- struct DefaultCacheIndexingFunction {
- static inline Waddr setof(T address) { return bits(address, log2(linesize), log2(setcount)); }
- };
- template <typename T, int setcount, int linesize>
- struct XORCacheIndexingFunction {
- static inline Waddr setof(T address) {
- address >>= log2(linesize);
- const int tagbits = (sizeof(Waddr) * 8) - log2(linesize);
- address = lowbits(address, tagbits);
- return foldbits<log2(setcount)>(address);
- }
- };
- template <typename T, int setcount, int linesize>
- struct CRCCacheIndexingFunction {
- static inline Waddr setof(T address) {
- Waddr slot = 0;
- address >>= log2(linesize);
- CRC32 crc;
- crc << address;
- W32 v = crc;
- return foldbits<log2(setcount)>(v);
- }
- };
- template <typename T, typename V, int setcount, int waycount, int linesize, typename indexfunc = DefaultCacheIndexingFunction<T, setcount, linesize>, typename stats = NullAssociativeArrayStatisticsCollector<T, V> >
- struct LockableCommitRollbackAssociativeArray {
- typedef LockableFullyAssociativeArray<T, V, waycount, stats> Set;
- Set sets[setcount];
- struct ClearList {
- W16 set;
- W16 way;
- };
- //
- // Technically (setcount * waycount) will cover everything,
- // but this is not true if we allow lines to be explicitly
- // invalidated between commits. In this case, a potentially
- // unlimited buffer would be required as noted below.
- //
- // Therefore, we choose a small size, above which it becomes
- // more efficient to just invalidate everything without
- // traversing a clear list.
- //
- ClearList clearlist[64];
- int cleartail;
- bool clearlist_exceeded;
- LockableCommitRollbackAssociativeArray() {
- reset();
- }
- void reset() {
- foreach (set, setcount) {
- sets[set].reset();
- }
- cleartail = 0;
- clearlist_exceeded = 0;
- }
- static int setof(T addr) {
- return indexfunc::setof(addr);
- }
- static T tagof(T addr) {
- return floor(addr, linesize);
- }
- V* probe(T addr) {
- return sets[setof(addr)].probe(tagof(addr));
- }
- V* select(T addr, T& oldaddr) {
- return sets[setof(addr)].select(tagof(addr), oldaddr);
- }
- V* select(T addr) {
- T dummy;
- return select(addr, dummy);
- }
- void invalidate(T addr) {
- sets[setof(addr)].invalidate(tagof(addr));
- }
- V* select_and_lock(T addr, bool& firstlock, T& oldtag) {
- V* line = sets[setof(addr)].select_and_lock(tagof(addr), firstlock, oldtag);
- if unlikely (!line) return NULL;
- if likely (firstlock) {
- int set = setof(addr);
- int way = sets[set].wayof(line);
- if unlikely (cleartail >= lengthof(clearlist)) {
- //
- // Too many lines are locked to keep track of: this can
- // happen if some lines are intentionally invalidated
- // before the final commit or rollback; these invalidates
- // do not remove the corresponding slot from the clearlist,
- // so the list may still overflow. In this case, just bulk
- // process every set and every way.
- //
- clearlist_exceeded = 1;
- } else {
- ClearList& c = clearlist[cleartail++];
- c.set = set;
- c.way = way;
- }
- }
- return line;
- }
- V* select_and_lock(T addr, bool& firstlock) {
- W64 dummy;
- return select_and_lock(addr, firstlock, dummy);
- }
- V* select_and_lock(T addr) { bool dummy; return select_and_lock(addr, dummy); }
- void unlock_all_and_invalidate() {
- if unlikely (clearlist_exceeded) {
- foreach (setid, setcount) {
- Set& set = sets[setid];
- foreach (wayid, waycount) set.invalidate_way(wayid);
- }
- } else {
- foreach (i, cleartail) {
- ClearList& c = clearlist[i];
- #if 0
- assert(c.set < setcount);
- assert(c.way < waycount);
- #endif
- Set& set = sets[c.set];
- V& line = set[c.way];
- set.invalidate_line(&line);
- }
- }
- cleartail = 0;
- clearlist_exceeded = 0;
- #if 0
- foreach (s, setcount) {
- Set& set = sets[s];
- foreach (way, waycount) {
- V& line = set[way];
- T tag = set.tagof(&line);
- if ((tag != set.tags.INVALID)) {
- assert(false);
- }
- }
- }
- #endif
- }
- void unlock_all() {
- if unlikely (clearlist_exceeded) {
- foreach (setid, setcount) {
- Set& set = sets[setid];
- foreach (wayid, waycount) set.unlock_way(wayid);
- }
- } else {
- foreach (i, cleartail) {
- ClearList& c = clearlist[i];
- #if 0
- assert(c.set < setcount);
- assert(c.way < waycount);
- #endif
- Set& set = sets[c.set];
- V& line = set[c.way];
- set.unlock_line(&line);
- }
- }
- cleartail = 0;
- clearlist_exceeded = 0;
- }
- ostream& print(ostream& os) const {
- os << "LockableAssociativeArray<", setcount, " sets, ", waycount, " ways, ", linesize, "-byte lines>:", endl;
- foreach (set, setcount) {
- os << " Set ", set, ":", endl;
- os << sets[set];
- }
- return os;
- }
- };
- template <typename T, typename V, int size, int ways, int linesize>
- ostream& operator <<(ostream& os, const LockableCommitRollbackAssociativeArray<T, V, size, ways, linesize>& aa) {
- return aa.print(os);
- }
- //
- // Lockable cache arrays supporting commit/rollback
- //
- // This structure implements the dirty-and-locked scheme to prevent speculative
- // data from propagating to lower levels of the cache hierarchy until it can be
- // committed.
- //
- // Any stores into the cache (signalled by select_and_lock()) back up the old
- // cache line and add this to an array for later rollback purposes.
- //
- // At commit(), all locked lines are unlocked and the backed up cache lines are
- // simply discarded, leaving them free to be replaced or written back.
- //
- // At rollback() all locked lines are invalidated in both this cache and any
- // higher levels (via the invalidate_upwards() callback), thereby forcing
- // clean copies to be refetched as needed after the rollback.
- //
- template <typename T, typename V, int setcount, int waycount, int linesize, int maxdirty, typename stats = NullAssociativeArrayStatisticsCollector<T, V> >
- struct CommitRollbackCache: public LockableCommitRollbackAssociativeArray<T, V, setcount, waycount, linesize, stats> {
- typedef LockableCommitRollbackAssociativeArray<T, V, setcount, waycount, linesize, stats> array_t;
- struct BackupCacheLine {
- W64* addr;
- W64 data[linesize / sizeof(W64)];
- };
- BackupCacheLine stores[maxdirty];
- BackupCacheLine* storetail;
- CommitRollbackCache() {
- reset();
- }
- void reset() {
- array_t::reset();
- storetail = stores;
- }
- //
- // Invalidate lines in higher level caches if needed
- //
- void invalidate_upwards(T addr);
- void invalidate(T addr) {
- array_t::invalidate(addr);
- invalidate_upwards(addr);
- }
- V* select_and_lock(T addr, T& oldaddr) {
- addr = floor(addr, linesize);
- bool firstlock;
- V* line = array_t::select_and_lock(addr, firstlock, oldaddr);
- if (!line) return NULL;
- if (firstlock) {
- W64* linedata = (W64*)addr;
- storetail->addr = linedata;
- foreach (i, lengthof(storetail->data)) storetail->data[i] = linedata[i];
- storetail++;
- }
- return line;
- }
- V* select_and_lock(T addr) {
- T dummy;
- return select_and_lock(addr, dummy);
- }
- void commit() {
- array_t::unlock_all();
- storetail = stores;
- }
- void rollback() {
- array_t::unlock_all_and_invalidate();
- BackupCacheLine* cl = stores;
- while (cl < storetail) {
- W64* linedata = cl->addr;
- foreach (i, lengthof(storetail->data)) linedata[i] = cl->data[i];
- invalidate_upwards((W64)cl->addr);
- cl++;
- }
- storetail = stores;
- }
- void complete() { }
- };
- template <int size, int padsize = 0>
- struct FullyAssociativeTags8bit {
- typedef vec16b vec_t;
- typedef byte base_t;
- static const int chunkcount = (size+15) / 16;
- static const int padchunkcount = (padsize+15) / 16;
- vec_t tags[chunkcount + padchunkcount] alignto(16);
- bitvec<size> valid;
- W64 getvalid() { return valid.integer(); }
- FullyAssociativeTags8bit() {
- reset();
- }
- base_t operator [](int i) const {
- return ((base_t*)&tags)[i];
- }
- base_t& operator [](int i) {
- return ((base_t*)&tags)[i];
- }
- bool isvalid(int index) {
- return valid[index];
- }
- void reset() {
- valid = 0;
- W64* p = (W64*)&tags;
- foreach (i, ((chunkcount + padchunkcount)*16)/8) p[i] = 0xffffffffffffffffLL;
- }
- static const vec_t prep(base_t tag) {
- return x86_sse_dupb(tag);
- }
- int insertslot(int idx, base_t tag) {
- valid[idx] = 1;
- (*this)[idx] = tag;
- return idx;
- }
- int insert(base_t tag) {
- if (valid.allset()) return -1;
- int idx = (~valid).lsb();
- return insertslot(idx, tag);
- }
- bitvec<size> match(const vec_t target) const {
- bitvec<size> m = 0;
- foreach (i, chunkcount) {
- m = m.accum(i*16, 16, x86_sse_pmovmskb(x86_sse_pcmpeqb(target, tags[i])));
- }
- return m & valid;
- }
- bitvec<size> match(base_t target) const {
- return match(prep(target));
- }
- bitvec<size> matchany(const vec_t target) const {
- bitvec<size> m = 0;
- vec_t zero = prep(0);
- foreach (i, chunkcount) {
- m = m.accum(i*16, 16, x86_sse_pmovmskb(x86_sse_pcmpeqb(x86_sse_pandb(tags[i], target), zero)));
- }
- return (~m) & valid;
- }
- bitvec<size> matchany(base_t target) const {
- return matchany(prep(target));
- }
- int search(const vec_t target) const {
- bitvec<size> bitmap = match(target);
- int idx = bitmap.lsb();
- if (!bitmap) idx = -1;
- return idx;
- }
- int extract(const vec_t target) {
- int idx = search(target);
- if (idx >= 0) valid[idx] = 0;
- return idx;
- }
- int search(base_t tag) const {
- return search(prep(tag));
- }
- bitvec<size> extract(base_t tag) {
- return extract(prep(tag));
- }
- void invalidateslot(int index) {
- valid[index] = 0;
- }
- const bitvec<size>& invalidatemask(const bitvec<size>& mask) {
- valid &= ~mask;
- return mask;
- }
- bitvec<size> invalidate(const vec_t target) {
- return invalidatemask(match(target));
- }
- bitvec<size> invalidate(base_t target) {
- return invalidate(prep(target));
- }
- void collapse(int index) {
- base_t* tagbase = (base_t*)&tags;
- base_t* base = tagbase + index;
- vec_t* dp = (vec_t*)base;
- vec_t* sp = (vec_t*)(base + sizeof(base_t));
- foreach (i, chunkcount) {
- x86_sse_stvbu(dp++, x86_sse_ldvbu(sp++));
- }
- valid = valid.remove(index);
- }
- void decrement(base_t amount = 1) {
- foreach (i, chunkcount) { tags[i] = x86_sse_psubusb(tags[i], prep(amount)); }
- }
- void increment(base_t amount = 1) {
- foreach (i, chunkcount) { tags[i] = x86_sse_paddusb(tags[i], prep(amount)); }
- }
- ostream& printid(ostream& os, int slot) const {
- int tag = (*this)[slot];
- if (valid[slot])
- os << intstring(tag, 3);
- else os << "???";
- return os;
- }
- ostream& print(ostream& os) const {
- foreach (i, size) {
- printid(os, i);
- os << " ";
- }
- return os;
- }
- };
- template <int size, int padsize>
- ostream& operator <<(ostream& os, const FullyAssociativeTags8bit<size, padsize>& tags) {
- return tags.print(os);
- }
- template <int size, int padsize = 0>
- struct FullyAssociativeTags16bit {
- typedef vec8w vec_t;
- typedef W16 base_t;
- static const int chunkcount = ((size*2)+15) / 16;
- static const int padchunkcount = ((padsize*2)+15) / 16;
- vec_t tags[chunkcount + padchunkcount] alignto(16);
- bitvec<size> valid;
- W64 getvalid() { return valid.integer(); }
- FullyAssociativeTags16bit() {
- reset();
- }
- base_t operator [](int i) const {
- return ((base_t*)&tags)[i];
- }
- base_t& operator [](int i) {
- return ((base_t*)&tags)[i];
- }
- bool isvalid(int index) {
- return valid[index];
- }
- void reset() {
- valid = 0;
- W64* p = (W64*)&tags;
- foreach (i, ((chunkcount + padchunkcount)*16)/8) p[i] = 0xffffffffffffffffLL;
- }
- static const vec_t prep(base_t tag) {
- return x86_sse_dupw(tag);
- }
- int insertslot(int idx, base_t tag) {
- valid[idx] = 1;
- (*this)[idx] = tag;
- return idx;
- }
- int insert(base_t tag) {
- if (valid.allset()) return -1;
- int idx = (~valid).lsb();
- return insertslot(idx, tag);
- }
- bitvec<size> match(const vec_t target) const {
- bitvec<size> m = 0;
- foreach (i, chunkcount) {
- m = m.accum(i*8, 8, x86_sse_pmovmskw(x86_sse_pcmpeqw(target, tags[i])));
- }
- return m & valid;
- }
- bitvec<size> match(base_t target) const {
- return match(prep(target));
- }
- bitvec<size> matchany(const vec_t target) const {
- bitvec<size> m = 0;
- vec_t zero = prep(0);
- foreach (i, chunkcount) {
- m = m.accum(i*8, 8, x86_sse_pmovmskw(x86_sse_pcmpeqw(x86_sse_pandw(tags[i], target), zero)));
- }
- return (~m) & valid;
- }
- bitvec<size> matchany(base_t target) const {
- return matchany(prep(target));
- }
- int search(const vec_t target) const {
- bitvec<size> bitmap = match(target);
- int idx = bitmap.lsb();
- if (!bitmap) idx = -1;
- return idx;
- }
- int extract(const vec_t target) {
- int idx = search(target);
- if (idx >= 0) valid[idx] = 0;
- return idx;
- }
- int search(base_t tag) const {
- return search(prep(tag));
- }
- bitvec<size> extract(base_t tag) {
- return extract(prep(tag));
- }
- void invalidateslot(int index) {
- valid[index] = 0;
- }
- const bitvec<size>& invalidatemask(const bitvec<size>& mask) {
- valid &= ~mask;
- return mask;
- }
- bitvec<size> invalidate(const vec_t target) {
- return invalidatemask(match(target));
- }
- bitvec<size> invalidate(base_t target) {
- return invalidate(prep(target));
- }
- void collapse(int index) {
- base_t* tagbase = (base_t*)&tags;
- base_t* base = tagbase + index;
- vec_t* dp = (vec_t*)base;
- vec_t* sp = (vec_t*)(base + 1);
- foreach (i, chunkcount) {
- x86_sse_stvwu(dp++, x86_sse_ldvwu(sp++));
- }
- valid = valid.remove(index);
- }
- void decrement(base_t amount = 1) {
- foreach (i, chunkcount) { tags[i] = x86_sse_psubusw(tags[i], prep(amount)); }
- }
- void increment(base_t amount = 1) {
- foreach (i, chunkcount) { tags[i] = x86_sse_paddusw(tags[i], prep(amount)); }
- }
- ostream& printid(ostream& os, int slot) const {
- int tag = (*this)[slot];
- if (valid[slot])
- os << intstring(tag, 3);
- else os << "???";
- return os;
- }
- ostream& print(ostream& os) const {
- foreach (i, size) {
- printid(os, i);
- os << " ";
- }
- return os;
- }
- };
- template <int size, int padsize>
- ostream& operator <<(ostream& os, const FullyAssociativeTags16bit<size, padsize>& tags) {
- return tags.print(os);
- }
- #endif // _LOGIC_H_