PageRenderTime 31ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/hphp/runtime/vm/jit/vasm-layout.cpp

https://gitlab.com/alvinahmadov2/hhvm
C++ | 419 lines | 283 code | 66 blank | 70 comment | 48 complexity | cf764a5c76c92a6b8111842a21a6e39c MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2015 Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include <folly/MapUtil.h>
  17. #include "hphp/util/trace.h"
  18. #include "hphp/runtime/vm/jit/containers.h"
  19. #include "hphp/runtime/vm/jit/mc-generator.h"
  20. #include "hphp/runtime/vm/jit/prof-data.h"
  21. #include "hphp/runtime/vm/jit/timer.h"
  22. #include "hphp/runtime/vm/jit/vasm.h"
  23. #include "hphp/runtime/vm/jit/vasm-instr.h"
  24. #include "hphp/runtime/vm/jit/vasm-print.h"
  25. #include "hphp/runtime/vm/jit/vasm-unit.h"
  26. #include "hphp/runtime/vm/jit/vasm-visit.h"
  27. #include <boost/dynamic_bitset.hpp>
  28. #include <algorithm>
  29. /*
  30. * This module implements two code layout strategies for sorting the
  31. * Vasm blocks:
  32. *
  33. * 1) rpoLayout() implements a simple layout that sorts the blocks in
  34. * reverse post-order. The final list of blocks is also
  35. * partitioned so that any blocks assigned to the Main code area
  36. * appear before the blocks assigned to the Cold area, which in
  37. * turn appear before all blocks assigned to the Frozen area.
  38. * This method is used when no profiling information is available.
  39. *
  40. * 2) pgoLayout() is enabled for Optimize, PGO-based regions. This
  41. * implements the algorithm described in "Profile Guided Code
  42. * Positioning" (PLDI'1990) by Pettis & Hansen (more specifically,
  43. * Algo2, from section 4.2.1). This implementation uses estimated
  44. * arc weights derived from a combination of profile counters
  45. * inserted at the bytecode-level blocks (in Profile translations)
  46. * and the JIT-time Likely/Unlikely/Unused hints (encoded in the
  47. * "area" field of Vblocks).
  48. *
  49. */
  50. namespace HPHP { namespace jit {
  51. ///////////////////////////////////////////////////////////////////////////////
  52. namespace {
  53. TRACE_SET_MOD(layout);
  54. ///////////////////////////////////////////////////////////////////////////////
  55. jit::vector<Vlabel> rpoLayout(const Vunit& unit) {
  56. auto blocks = sortBlocks(unit);
  57. // Partition into main/cold/frozen areas without changing relative order, and
  58. // the end{} block will be last.
  59. auto coldIt = std::stable_partition(blocks.begin(), blocks.end(),
  60. [&](Vlabel b) {
  61. return unit.blocks[b].area == AreaIndex::Main &&
  62. unit.blocks[b].code.back().op != Vinstr::fallthru;
  63. });
  64. std::stable_partition(coldIt, blocks.end(),
  65. [&](Vlabel b) {
  66. return unit.blocks[b].area == AreaIndex::Cold &&
  67. unit.blocks[b].code.back().op != Vinstr::fallthru;
  68. });
  69. return blocks;
  70. }
  71. ///////////////////////////////////////////////////////////////////////////////
  72. /**
  73. * This keeps track of the weights of blocks and arcs in a Vunit.
  74. */
  75. struct Scale {
  76. explicit Scale(const Vunit& unit)
  77. : m_unit(unit)
  78. , m_blocks(sortBlocks(unit))
  79. , m_preds(computePreds(unit)) {
  80. computeWeights();
  81. }
  82. int64_t weight(Vlabel blk) const;
  83. int64_t weight(Vlabel src, Vlabel dst) const;
  84. std::string toString() const;
  85. private:
  86. void computeWeights();
  87. void computeBlockWeights();
  88. void computeArcWeights();
  89. TransID findProfTransID(Vlabel blk) const;
  90. int64_t findProfCount(Vlabel blk) const;
  91. static uint64_t arcId(Vlabel src, Vlabel dst) { return (src << 32) + dst; }
  92. const Vunit& m_unit;
  93. const jit::vector<Vlabel> m_blocks;
  94. const PredVector m_preds;
  95. jit::vector<int64_t> m_blkWgts;
  96. jit::hash_map<uint64_t, int64_t> m_arcWgts; // keyed using arcId()
  97. };
  98. int64_t Scale::weight(Vlabel blk) const {
  99. return m_blkWgts[blk];
  100. }
  101. int64_t Scale::weight(Vlabel src, Vlabel dst) const {
  102. return folly::get_default(m_arcWgts, arcId(src, dst), 0);
  103. }
  104. TransID Scale::findProfTransID(Vlabel blk) const {
  105. for (auto& i : m_unit.blocks[blk].code) {
  106. if (!i.origin) continue;
  107. auto profTransID = i.origin->marker().profTransID();
  108. if (profTransID == kInvalidTransID) continue;
  109. return profTransID;
  110. }
  111. return kInvalidTransID;
  112. }
  113. int64_t Scale::findProfCount(Vlabel blk) const {
  114. for (auto& i : m_unit.blocks[blk].code) {
  115. if (i.origin) {
  116. return i.origin->block()->profCount();
  117. }
  118. }
  119. return 1;
  120. }
  121. void Scale::computeBlockWeights() {
  122. m_blkWgts.resize(m_unit.blocks.size(), 0);
  123. // We divide the corresponding region block's profile counter by the
  124. // following factors, depending on the code area the block is
  125. // assigned to.
  126. static int areaWeightFactors[] = { 1, /* main */
  127. 10, /* cold */
  128. 100 /* frozen */ };
  129. static_assert(
  130. sizeof(areaWeightFactors) / sizeof(areaWeightFactors[0]) == kNumAreas,
  131. "need to update areaWeightFactors");
  132. for (auto b : m_blocks) {
  133. auto a = unsigned(m_unit.blocks[b].area);
  134. assertx(a < 3);
  135. m_blkWgts[b] = findProfCount(b) / areaWeightFactors[a];
  136. }
  137. }
  138. void Scale::computeArcWeights() {
  139. for (auto b : m_blocks) {
  140. auto succSet = succs(m_unit.blocks[b]);
  141. for (auto s : succSet) {
  142. // If the arc is non-critical, we can figure out its weight by
  143. // looking at its incident blocks. For critical arcs, we
  144. // currently just approximate it as half of the smallest weight
  145. // of its incident blocks.
  146. auto arcid = arcId(b, s);
  147. m_arcWgts[arcid] = succSet.size() == 1 ? weight(b)
  148. : m_preds[s].size() == 1 ? weight(s)
  149. : std::min(weight(b), weight(s)) / 2;
  150. FTRACE(3, "arc({} -> {}) => weight = {} "
  151. "[|succs(b)| = {} ; |preds(s)| = {}] "
  152. "[weight(b) = {} ; weight(s) = {}]\n", b, s, m_arcWgts[arcid],
  153. succSet.size(), m_preds[s].size(), weight(b), weight(s));
  154. }
  155. }
  156. }
  157. void Scale::computeWeights() {
  158. computeBlockWeights();
  159. computeArcWeights();
  160. }
  161. std::string Scale::toString() const {
  162. std::ostringstream out;
  163. out << "digraph {\n";
  164. int64_t maxWgt = 1;
  165. for (auto b : m_blocks) {
  166. maxWgt = std::max(maxWgt, weight(b));
  167. }
  168. for (auto b : m_blocks) {
  169. unsigned coldness = 255 - (255 * weight(b) / maxWgt);
  170. out << folly::format(
  171. "{} [label=\"{}\\nw: {}\\nptid: {}\\narea: {}\\nprof: {}\","
  172. "shape=box,style=filled,fillcolor=\"#ff{:02x}{:02x}\"]\n",
  173. b, b, weight(b), findProfTransID(b), unsigned(m_unit.blocks[b].area),
  174. findProfCount(b), coldness, coldness);
  175. for (auto s : succs(m_unit.blocks[b])) {
  176. out << folly::format("{} -> {} [label={}];\n", b, s, weight(b, s));
  177. }
  178. }
  179. out << "}\n";
  180. return out.str();
  181. }
  182. ///////////////////////////////////////////////////////////////////////////////
  183. struct Clusterizer {
  184. Clusterizer(const Vunit& unit, const Scale& scale)
  185. : m_unit(unit)
  186. , m_scale(scale)
  187. , m_blocks(sortBlocks(unit)) {
  188. initClusters();
  189. clusterize();
  190. sortClusters();
  191. FTRACE(1, "{}", toString());
  192. }
  193. jit::vector<Vlabel> getBlockList() const;
  194. private:
  195. using Cluster = jit::vector<Vlabel>;
  196. void initClusters();
  197. void clusterize();
  198. void sortClusters();
  199. std::string toString() const;
  200. const Vunit& m_unit;
  201. const Scale& m_scale;
  202. const jit::vector<Vlabel> m_blocks;
  203. jit::vector<Cluster> m_clusters;
  204. jit::vector<Vlabel> m_blockCluster; // maps block to current cluster
  205. jit::vector<Vlabel> m_clusterOrder; // final sorted list of cluster ids
  206. };
  207. jit::vector<Vlabel> Clusterizer::getBlockList() const {
  208. jit::vector<Vlabel> list;
  209. for (auto cid : m_clusterOrder) {
  210. for (auto b : m_clusters[cid]) {
  211. list.push_back(b);
  212. }
  213. }
  214. return list;
  215. }
  216. std::string Clusterizer::toString() const {
  217. std::ostringstream out;
  218. out << "clusterize: final clusters:\n";
  219. for (auto cid : m_clusterOrder) {
  220. out << folly::sformat(" - cluster {}: ", cid);
  221. for (auto b : m_clusters[cid]) {
  222. out << folly::sformat("{}, ", b);
  223. }
  224. out << "\n";;
  225. }
  226. return out.str();
  227. }
  228. void Clusterizer::initClusters() {
  229. m_clusters.resize(m_unit.blocks.size());
  230. m_blockCluster.resize(m_unit.blocks.size());
  231. for (auto b : m_blocks) {
  232. m_clusters[b].push_back(b);
  233. m_blockCluster[b] = b;
  234. }
  235. }
  236. void Clusterizer::clusterize() {
  237. struct ArcInfo {
  238. Vlabel src;
  239. Vlabel dst;
  240. int64_t wgt;
  241. };
  242. jit::vector<ArcInfo> arcInfos;
  243. for (auto b : m_blocks) {
  244. for (auto s : succs(m_unit.blocks[b])) {
  245. arcInfos.push_back({b, s, m_scale.weight(b, s)});
  246. }
  247. }
  248. // sort arcs in decreasing weight order
  249. std::sort(arcInfos.begin(), arcInfos.end(),
  250. [&](const ArcInfo& a1, const ArcInfo& a2) {
  251. return a1.wgt > a2.wgt;
  252. });
  253. for (auto& arcInfo : arcInfos) {
  254. auto src = arcInfo.src;
  255. auto dst = arcInfo.dst;
  256. auto srcCid = m_blockCluster[src];
  257. auto dstCid = m_blockCluster[dst];
  258. if (srcCid == dstCid) continue;
  259. auto& srcC = m_clusters[srcCid];
  260. auto& dstC = m_clusters[dstCid];
  261. // src must be the last in its cluster
  262. if (srcC.back() != src) continue;
  263. // dst must be the first in its cluster
  264. if (dstC.front() != dst) continue;
  265. // merge the clusters by append the blocks in dstC to srcC
  266. for (auto d : dstC) {
  267. srcC.push_back(d);
  268. m_blockCluster[d] = srcCid;
  269. }
  270. dstC.clear();
  271. }
  272. }
  273. using SuccInfos = jit::hash_map<uint32_t, int64_t>; // cluster id => weight
  274. struct DFSSortClusters {
  275. DFSSortClusters(const jit::vector<SuccInfos>&& succInfos, const Vunit& unit)
  276. : m_clusterSuccs(succInfos)
  277. , m_visited(unit.blocks.size()) { }
  278. jit::vector<Vlabel> sort(uint32_t initialCid);
  279. private:
  280. void dfs(uint32_t cid);
  281. jit::vector<SuccInfos> m_clusterSuccs;
  282. boost::dynamic_bitset<> m_visited;
  283. jit::vector<Vlabel> m_list;
  284. };
  285. void DFSSortClusters::dfs(uint32_t cid) {
  286. if (m_visited.test(cid)) return;
  287. m_visited.set(cid);
  288. m_list.push_back(Vlabel(cid));
  289. // find the best successor, which is the one to which cid has the
  290. // highest weight among the ones that haven't been visited yet
  291. int64_t maxWgt = 0;
  292. uint32_t bestSucc = uint32_t(-1);
  293. for (auto& sInfo : m_clusterSuccs[cid]) {
  294. auto succId = sInfo.first;
  295. if (m_visited.test(succId)) continue;
  296. auto wgt = sInfo.second;
  297. if (wgt >= maxWgt) {
  298. maxWgt = wgt;
  299. bestSucc = succId;
  300. }
  301. }
  302. if (bestSucc == uint32_t(-1)) return;
  303. // visit bestSucc first
  304. dfs(bestSucc);
  305. // now visit the remaining ones
  306. for (auto& sInfo : m_clusterSuccs[cid]) {
  307. if (sInfo.first != bestSucc) {
  308. dfs(sInfo.first);
  309. }
  310. }
  311. }
  312. jit::vector<Vlabel> DFSSortClusters::sort(uint32_t initialCid) {
  313. dfs(initialCid);
  314. return std::move(m_list);
  315. }
  316. /*
  317. * This method creates a weighted graph of the clusters, and sorts
  318. * them according to a DFS pre-order that prioritizes the arcs with
  319. * heaviest weights, so as to try to have a cluster be followed by its
  320. * mostly likely successor cluster.
  321. */
  322. void Clusterizer::sortClusters() {
  323. jit::vector<SuccInfos> clusterGraph;
  324. clusterGraph.resize(m_unit.blocks.size());
  325. for (auto b : m_blocks) {
  326. for (auto s : succs(m_unit.blocks[b])) {
  327. auto srcCid = m_blockCluster[b];
  328. auto dstCid = m_blockCluster[s];
  329. if (srcCid == dstCid) continue;
  330. auto wgt = m_scale.weight(b, s);
  331. clusterGraph[srcCid][dstCid] += wgt;
  332. }
  333. }
  334. DFSSortClusters dfsSort(std::move(clusterGraph), m_unit);
  335. m_clusterOrder = dfsSort.sort(m_blockCluster[m_unit.entry]);
  336. }
  337. ///////////////////////////////////////////////////////////////////////////////
  338. jit::vector<Vlabel> pgoLayout(const Vunit& unit) {
  339. // compute block & arc weights
  340. Scale scale(unit);
  341. FTRACE(1, "profileGuidedLayout: Weighted CFG:\n{}\n", scale.toString());
  342. // cluster the blocks based on weights and sort the clusters
  343. Clusterizer clusterizer(unit, scale);
  344. return clusterizer.getBlockList();
  345. }
  346. }
  347. ///////////////////////////////////////////////////////////////////////////////
  348. jit::vector<Vlabel> layoutBlocks(const Vunit& unit) {
  349. Timer timer(Timer::vasm_layout);
  350. return mcg->tx().mode() == TransKind::Optimize ? pgoLayout(unit)
  351. : rpoLayout(unit);
  352. }
  353. } }