PageRenderTime 67ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/external/valgrind/main/cachegrind/cg_sim.c

https://gitlab.com/brian0218/rk3188_r-box_android4.2.2_sdk
C | 198 lines | 140 code | 15 blank | 43 comment | 32 complexity | 2290e42eaf1338278827335b0e3823c0 MD5 | raw file
  1. /*--------------------------------------------------------------------*/
  2. /*--- Cache simulation cg_sim.c ---*/
  3. /*--------------------------------------------------------------------*/
  4. /*
  5. This file is part of Cachegrind, a Valgrind tool for cache
  6. profiling programs.
  7. Copyright (C) 2002-2011 Nicholas Nethercote
  8. njn@valgrind.org
  9. This program is free software; you can redistribute it and/or
  10. modify it under the terms of the GNU General Public License as
  11. published by the Free Software Foundation; either version 2 of the
  12. License, or (at your option) any later version.
  13. This program is distributed in the hope that it will be useful, but
  14. WITHOUT ANY WARRANTY; without even the implied warranty of
  15. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. General Public License for more details.
  17. You should have received a copy of the GNU General Public License
  18. along with this program; if not, write to the Free Software
  19. Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
  20. 02111-1307, USA.
  21. The GNU General Public License is contained in the file COPYING.
  22. */
  23. /* Notes:
  24. - simulates a write-allocate cache
  25. - (block --> set) hash function uses simple bit selection
  26. - handling of references straddling two cache blocks:
  27. - counts as only one cache access (not two)
  28. - both blocks hit --> one hit
  29. - one block hits, the other misses --> one miss
  30. - both blocks miss --> one miss (not two)
  31. */
  32. typedef struct {
  33. Int size; /* bytes */
  34. Int assoc;
  35. Int line_size; /* bytes */
  36. Int sets;
  37. Int sets_min_1;
  38. Int line_size_bits;
  39. Int tag_shift;
  40. Char desc_line[128];
  41. UWord* tags;
  42. } cache_t2;
  43. /* By this point, the size/assoc/line_size has been checked. */
  44. static void cachesim_initcache(cache_t config, cache_t2* c)
  45. {
  46. Int i;
  47. c->size = config.size;
  48. c->assoc = config.assoc;
  49. c->line_size = config.line_size;
  50. c->sets = (c->size / c->line_size) / c->assoc;
  51. c->sets_min_1 = c->sets - 1;
  52. c->line_size_bits = VG_(log2)(c->line_size);
  53. c->tag_shift = c->line_size_bits + VG_(log2)(c->sets);
  54. if (c->assoc == 1) {
  55. VG_(sprintf)(c->desc_line, "%d B, %d B, direct-mapped",
  56. c->size, c->line_size);
  57. } else {
  58. VG_(sprintf)(c->desc_line, "%d B, %d B, %d-way associative",
  59. c->size, c->line_size, c->assoc);
  60. }
  61. c->tags = VG_(malloc)("cg.sim.ci.1",
  62. sizeof(UWord) * c->sets * c->assoc);
  63. for (i = 0; i < c->sets * c->assoc; i++)
  64. c->tags[i] = 0;
  65. }
  66. /* This is done as a macro rather than by passing in the cache_t2 as an
  67. * arg because it slows things down by a small amount (3-5%) due to all
  68. * that extra indirection. */
  69. #define CACHESIM(L, MISS_TREATMENT) \
  70. /* The cache and associated bits and pieces. */ \
  71. static cache_t2 L; \
  72. \
  73. static void cachesim_##L##_initcache(cache_t config) \
  74. { \
  75. cachesim_initcache(config, &L); \
  76. } \
  77. \
  78. /* This attribute forces GCC to inline this function, even though it's */ \
  79. /* bigger than its usual limit. Inlining gains around 5--10% speedup. */ \
  80. __attribute__((always_inline)) \
  81. static __inline__ \
  82. void cachesim_##L##_doref(Addr a, UChar size, ULong* m1, ULong *mL) \
  83. { \
  84. UInt set1 = ( a >> L.line_size_bits) & (L.sets_min_1); \
  85. UInt set2 = ((a+size-1) >> L.line_size_bits) & (L.sets_min_1); \
  86. UWord tag = a >> L.tag_shift; \
  87. UWord tag2; \
  88. Int i, j; \
  89. Bool is_miss = False; \
  90. UWord* set; \
  91. \
  92. /* First case: word entirely within line. */ \
  93. if (set1 == set2) { \
  94. \
  95. set = &(L.tags[set1 * L.assoc]); \
  96. \
  97. /* This loop is unrolled for just the first case, which is the most */\
  98. /* common. We can't unroll any further because it would screw up */\
  99. /* if we have a direct-mapped (1-way) cache. */\
  100. if (tag == set[0]) { \
  101. return; \
  102. } \
  103. /* If the tag is one other than the MRU, move it into the MRU spot */\
  104. /* and shuffle the rest down. */\
  105. for (i = 1; i < L.assoc; i++) { \
  106. if (tag == set[i]) { \
  107. for (j = i; j > 0; j--) { \
  108. set[j] = set[j - 1]; \
  109. } \
  110. set[0] = tag; \
  111. return; \
  112. } \
  113. } \
  114. \
  115. /* A miss; install this tag as MRU, shuffle rest down. */ \
  116. for (j = L.assoc - 1; j > 0; j--) { \
  117. set[j] = set[j - 1]; \
  118. } \
  119. set[0] = tag; \
  120. MISS_TREATMENT; \
  121. return; \
  122. \
  123. /* Second case: word straddles two lines. */ \
  124. /* Nb: this is a fast way of doing ((set1+1) % L.sets) */ \
  125. } else if (((set1 + 1) & (L.sets_min_1)) == set2) { \
  126. set = &(L.tags[set1 * L.assoc]); \
  127. if (tag == set[0]) { \
  128. goto block2; \
  129. } \
  130. for (i = 1; i < L.assoc; i++) { \
  131. if (tag == set[i]) { \
  132. for (j = i; j > 0; j--) { \
  133. set[j] = set[j - 1]; \
  134. } \
  135. set[0] = tag; \
  136. goto block2; \
  137. } \
  138. } \
  139. for (j = L.assoc - 1; j > 0; j--) { \
  140. set[j] = set[j - 1]; \
  141. } \
  142. set[0] = tag; \
  143. is_miss = True; \
  144. block2: \
  145. set = &(L.tags[set2 * L.assoc]); \
  146. tag2 = (a+size-1) >> L.tag_shift; \
  147. if (tag2 == set[0]) { \
  148. goto miss_treatment; \
  149. } \
  150. for (i = 1; i < L.assoc; i++) { \
  151. if (tag2 == set[i]) { \
  152. for (j = i; j > 0; j--) { \
  153. set[j] = set[j - 1]; \
  154. } \
  155. set[0] = tag2; \
  156. goto miss_treatment; \
  157. } \
  158. } \
  159. for (j = L.assoc - 1; j > 0; j--) { \
  160. set[j] = set[j - 1]; \
  161. } \
  162. set[0] = tag2; \
  163. is_miss = True; \
  164. miss_treatment: \
  165. if (is_miss) { MISS_TREATMENT; } \
  166. \
  167. } else { \
  168. VG_(printf)("addr: %lx size: %u sets: %d %d", a, size, set1, set2);\
  169. VG_(tool_panic)("item straddles more than two cache sets"); \
  170. } \
  171. return; \
  172. }
  173. CACHESIM(LL, (*mL)++ );
  174. CACHESIM(I1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
  175. CACHESIM(D1, { (*m1)++; cachesim_LL_doref(a, size, m1, mL); } );
  176. /*--------------------------------------------------------------------*/
  177. /*--- end cg_sim.c ---*/
  178. /*--------------------------------------------------------------------*/