PageRenderTime 27ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 1ms

/hphp/runtime/vm/jit/region-hot-cfg.cpp

https://gitlab.com/iranjith4/hhvm
C++ | 287 lines | 194 code | 40 blank | 53 comment | 30 complexity | a891b5b388ff62fde9f95e95438c962a MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2016 Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include <memory>
  17. #include <algorithm>
  18. #include "hphp/util/trace.h"
  19. #include "hphp/runtime/vm/jit/normalized-instruction.h"
  20. #include "hphp/runtime/vm/jit/prof-data.h"
  21. #include "hphp/runtime/vm/jit/region-prune-arcs.h"
  22. #include "hphp/runtime/vm/jit/region-selection.h"
  23. #include "hphp/runtime/vm/jit/target-profile.h"
  24. #include "hphp/runtime/vm/jit/trans-cfg.h"
  25. /*
  26. * This module supports the implementation of two region selectors: hotcfg and
  27. * wholecfg. In hotcfg mode, it constructs a region that is a maximal CFG
  28. * given the constraints for what is currently supported within a region and
  29. * the JitPGOMinBlockCountPercent and JitPGOMinArcProbability runtime options
  30. * (which can be used to prune cold/unlikely code). In wholecfg mode, these
  31. * two runtime options are ignored and nothing is pruned based on profile
  32. * counters.
  33. */
  34. namespace HPHP { namespace jit {
  35. TRACE_SET_MOD(pgo);
  36. namespace {
  37. const StaticString s_switchProfile("SwitchProfile");
  38. //////////////////////////////////////////////////////////////////////
  39. struct DFS {
  40. DFS(const ProfData* p, const TransCFG& c, int32_t maxBCInstrs, bool inlining)
  41. : m_profData(p)
  42. , m_cfg(c)
  43. , m_numBCInstrs(maxBCInstrs)
  44. , m_inlining(inlining)
  45. {}
  46. RegionDescPtr formRegion(TransID head) {
  47. m_region = std::make_shared<RegionDesc>();
  48. if (RuntimeOption::EvalJitPGORegionSelector == "wholecfg") {
  49. m_minBlockWeight = 0;
  50. m_minArcProb = 0;
  51. } else {
  52. auto const minBlkPerc = RuntimeOption::EvalJitPGOMinBlockCountPercent;
  53. m_minBlockWeight = minBlkPerc * m_cfg.weight(head) / 100.0;
  54. m_minArcProb = RuntimeOption::EvalJitPGOMinArcProbability;
  55. }
  56. ITRACE(3, "formRegion: starting at head = {} (weight = {})\n"
  57. " minBlockWeight = {:.2}\n"
  58. " minArcProb = {:.2}\n",
  59. head, m_cfg.weight(head), m_minBlockWeight, m_minArcProb);
  60. Trace::Indent indent;
  61. visit(head);
  62. for (auto& arc : m_arcs) {
  63. m_region->addArc(arc.src, arc.dst);
  64. }
  65. return m_region;
  66. }
  67. private:
  68. static constexpr int kMaxNonDefaultCases = 4;
  69. static constexpr int kMinSwitchPercent = 75;
  70. /*
  71. * Look up profiling data for the Switch at the end of tid and decide which
  72. * outgoing arcs, if any, to include in the region. Arcs that survive this
  73. * function may still be trimmed by the other checks in visit().
  74. */
  75. void trimSwitchArcs(TransID tid,
  76. const RegionDesc& profRegion,
  77. std::vector<TransCFG::Arc*>& arcs) {
  78. ITRACE(5, "Analyzing Switch ending profTrans {}\n", tid);
  79. Trace::Indent indent;
  80. auto sk = profRegion.blocks().back()->last();
  81. assert(sk.op() == OpSwitch);
  82. TargetProfile<SwitchProfile> profile(tid,
  83. TransKind::Optimize,
  84. sk.offset(),
  85. s_switchProfile.get());
  86. assert(!profile.profiling());
  87. if (!profile.optimizing()) {
  88. // We don't have profile data for this Switch, most likely because it saw
  89. // some weird input type during profiling.
  90. arcs.clear();
  91. return;
  92. }
  93. NormalizedInstruction ni{sk, sk.unit()};
  94. std::vector<Offset> offsets;
  95. for (auto off : ni.immVec.range32()) offsets.push_back(sk.offset() + off);
  96. auto const data = sortedSwitchProfile(profile, offsets.size());
  97. uint32_t totalHits = 0;
  98. for (auto const& item : data) totalHits += item.count;
  99. if (totalHits == 0) {
  100. // This switch was never executed during profiling.
  101. arcs.clear();
  102. return;
  103. }
  104. // Allow arcs if the hottest kMaxNonDefaultCases account for at least
  105. // kMinSwitchPercent % of total profiling hits.
  106. uint32_t includedCases = 0;
  107. uint32_t includedHits = 0;
  108. std::unordered_set<SrcKey, SrcKey::Hasher> allowedSks;
  109. for (auto const& item : data) {
  110. // We always have bounds checks for the default, so it doesn't count
  111. // against the case limit.
  112. if (item.caseIdx == data.size() - 1) {
  113. ITRACE(5, "Adding {} hits from default case @ {}\n",
  114. item.count, offsets[item.caseIdx]);
  115. includedHits += item.count;
  116. allowedSks.insert(SrcKey{sk, offsets[item.caseIdx]});
  117. continue;
  118. }
  119. if (includedCases == kMaxNonDefaultCases) {
  120. if (includedHits * 100 / totalHits < kMinSwitchPercent) {
  121. ITRACE(5, "Profile data not biased towards hot cases; bailing\n");
  122. arcs.clear();
  123. return;
  124. }
  125. break;
  126. }
  127. ITRACE(5, "Adding {} hits from case {} @ {}\n",
  128. item.count, item.caseIdx, offsets[item.caseIdx]);
  129. ++includedCases;
  130. includedHits += item.count;
  131. allowedSks.insert(SrcKey{sk, offsets[item.caseIdx]});
  132. }
  133. ITRACE(5, "Including {} cases, representing {} / {} samples\n",
  134. includedCases, includedHits, totalHits);
  135. auto firstDead = std::remove_if(
  136. begin(arcs), end(arcs), [&](const TransCFG::Arc* arc) {
  137. auto const rec = m_profData->transRec(arc->dst());
  138. const bool ok = allowedSks.count(rec->srcKey());
  139. ITRACE(5, "Arc {} -> {} {}included\n",
  140. arc->src(), arc->dst(), ok ? "" : "not ");
  141. return !ok;
  142. }
  143. );
  144. arcs.erase(firstDead, end(arcs));
  145. }
  146. void visit(TransID tid) {
  147. auto rec = m_profData->transRec(tid);
  148. auto tidRegion = rec->region();
  149. auto tidInstrs = tidRegion->instrSize();
  150. if (tidInstrs > m_numBCInstrs) {
  151. ITRACE(5, "- visit: skipping {} due to region size\n", tid);
  152. return;
  153. }
  154. // Skip tid if its weight is below the JitPGOMinBlockPercent
  155. // percentage of the weight of the block where this region
  156. // started.
  157. auto tidWeight = m_cfg.weight(tid);
  158. if (tidWeight < m_minBlockWeight) {
  159. ITRACE(5, "- visit: skipping {} due to low weight ({})\n",
  160. tid, tidWeight);
  161. return;
  162. }
  163. if (!m_visited.insert(tid).second) return;
  164. m_visiting.insert(tid);
  165. m_numBCInstrs -= tidInstrs;
  166. ITRACE(5, "- visit: adding {} ({})\n", tid, tidWeight);
  167. auto const termSk = rec->lastSrcKey();
  168. auto const termOp = termSk.op();
  169. if (!breaksRegion(termSk)) {
  170. auto srcBlockId = tidRegion->blocks().back().get()->id();
  171. auto arcs = m_cfg.outArcs(tid);
  172. // We have special profiling and logic to decide which arcs from a Switch
  173. // are eligible for inclusion in the region.
  174. if (termOp == OpSwitch) {
  175. trimSwitchArcs(srcBlockId, *tidRegion, arcs);
  176. }
  177. for (auto const arc : arcs) {
  178. auto dst = arc->dst();
  179. // Skip if the probability of taking this arc is below the specified
  180. // threshold.
  181. if (arc->weight() < m_minArcProb * tidWeight) {
  182. ITRACE(5, "- visit: skipping arc {} -> {} due to low probability "
  183. "({:.2})\n", tid, dst, arc->weight() / (tidWeight + 0.001));
  184. continue;
  185. }
  186. // Skip dst if we already generated a region starting at that SrcKey.
  187. auto dstRec = m_profData->transRec(dst);
  188. auto dstSK = dstRec->srcKey();
  189. if (!m_inlining && m_profData->optimized(dstSK)) {
  190. ITRACE(5, "- visit: skipping {} because SrcKey was already "
  191. "optimize", showShort(dstSK));
  192. continue;
  193. }
  194. always_assert(dst == dstRec->region()->entry()->id());
  195. visit(dst);
  196. // Record the arc if dstBlockId was included in the region. (Note that
  197. // it may not be included in the region due to the
  198. // EvalJitMaxRegionInstrs limit.)
  199. if (m_visited.count(dst)) {
  200. m_arcs.push_back({srcBlockId, dst});
  201. }
  202. }
  203. }
  204. // Now insert the region for tid in the front of m_region. We do
  205. // this last so that the region ends up in (quasi-)topological order
  206. // (it'll be in topological order for acyclic regions).
  207. m_region->prepend(*tidRegion);
  208. always_assert(m_numBCInstrs >= 0);
  209. m_visiting.erase(tid);
  210. }
  211. private:
  212. const ProfData* m_profData;
  213. const TransCFG& m_cfg;
  214. RegionDescPtr m_region;
  215. int32_t m_numBCInstrs;
  216. jit::hash_set<TransID> m_visiting;
  217. jit::hash_set<TransID> m_visited;
  218. jit::vector<RegionDesc::Arc> m_arcs;
  219. double m_minBlockWeight;
  220. double m_minArcProb;
  221. bool m_inlining;
  222. };
  223. //////////////////////////////////////////////////////////////////////
  224. }
  225. RegionDescPtr selectHotCFG(HotTransContext& ctx) {
  226. ITRACE(1, "selectHotCFG: starting with maxBCInstrs = {}\n", ctx.maxBCInstrs);
  227. auto const region =
  228. DFS(ctx.profData, *ctx.cfg, ctx.maxBCInstrs, ctx.inlining)
  229. .formRegion(ctx.tid);
  230. if (region->empty()) return nullptr;
  231. ITRACE(3, "selectHotCFG: before region_prune_arcs:\n{}\n",
  232. show(*region));
  233. region_prune_arcs(*region, ctx.inputTypes);
  234. ITRACE(3, "selectHotCFG: before chainRetransBlocks:\n{}\n",
  235. show(*region));
  236. region->chainRetransBlocks();
  237. // Relax the region guards.
  238. if (RuntimeOption::EvalRegionRelaxGuards) {
  239. ITRACE(3, "selectHotCFG: before optimizeProfiledGuards:\n{}\n",
  240. show(*region));
  241. optimizeProfiledGuards(*region, *ctx.profData);
  242. }
  243. ITRACE(1, "selectHotCFG: final version after optimizeProfiledGuards:\n{}\n",
  244. show(*region));
  245. return region;
  246. }
  247. }}