PageRenderTime 62ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/hphp/hhbbc/dce.cpp

https://gitlab.com/Blueprint-Marketing/hhvm
C++ | 1026 lines | 573 code | 115 blank | 338 comment | 71 complexity | 899dc962ec2bf32cb08ed74843e6d1c1 MD5 | raw file
  1. /*
  2. +----------------------------------------------------------------------+
  3. | HipHop for PHP |
  4. +----------------------------------------------------------------------+
  5. | Copyright (c) 2010-2014 Facebook, Inc. (http://www.facebook.com) |
  6. +----------------------------------------------------------------------+
  7. | This source file is subject to version 3.01 of the PHP license, |
  8. | that is bundled with this package in the file LICENSE, and is |
  9. | available through the world-wide-web at the following url: |
  10. | http://www.php.net/license/3_01.txt |
  11. | If you did not receive a copy of the PHP license and are unable to |
  12. | obtain it through the world-wide-web, please send a note to |
  13. | license@php.net so we can mail you a copy immediately. |
  14. +----------------------------------------------------------------------+
  15. */
  16. #include "hphp/hhbbc/dce.h"
  17. #include <vector>
  18. #include <string>
  19. #include <utility>
  20. #include <bitset>
  21. #include <sstream>
  22. #include <algorithm>
  23. #include <set>
  24. #include <boost/dynamic_bitset.hpp>
  25. #include <folly/gen/Base.h>
  26. #include <folly/gen/String.h>
  27. #include "hphp/util/trace.h"
  28. #include "hphp/util/dataflow-worklist.h"
  29. #include "hphp/hhbbc/representation.h"
  30. #include "hphp/hhbbc/analyze.h"
  31. #include "hphp/hhbbc/interp-state.h"
  32. #include "hphp/hhbbc/interp.h"
  33. #include "hphp/hhbbc/type-system.h"
  34. #include "hphp/hhbbc/unit-util.h"
  35. #include "hphp/hhbbc/cfg.h"
  36. namespace HPHP { namespace HHBBC {
  37. TRACE_SET_MOD(hhbbc_dce);
  38. //////////////////////////////////////////////////////////////////////
  39. /*
  40. * This module contains code to perform both local DCE and global DCE.
  41. *
  42. * The local DCE algorithm addresses dead eval stack manipulations and
  43. * dead stores to locals that are visible within a single basic block.
  44. *
  45. * The global DCE performs a liveness analysis and then uses this
  46. * information to allow dead stores to locals to be eliminated across
  47. * blocks. It does not attempt to remove unnecessary evaluation stack
  48. * manipulation spanning basic blocks, but it uses the same local DCE
  49. * code and will eliminate intra-block stack manipulations.
  50. *
  51. * Both types of DCE here need to be type-aware, but they must visit
  52. * blocks backward. They accomplish this by forward-propagating the
  53. * block input states from a FuncAnalysis to each instruction in the
  54. * block prior to doing the backward iteration.
  55. *
  56. * Eval stack:
  57. *
  58. * During backward traversal of a block, we maintain a "backwards"
  59. * stack, indicating which eval stack slots are going to be required
  60. * in the future or not.
  61. *
  62. * During this traversal, each instruction that pops when going
  63. * forward instead "pushes" information about whether that input
  64. * will be required. If it is not required, it also pushes an
  65. * accumulating set of instruction ids that must be removed if the
  66. * instruction which produces the stack slot is removed. (All
  67. * instructions in these sets must be removed if any are, in order
  68. * to keep the stack depths correct.)
  69. *
  70. * Similarly, each instruction that would push a value going forward
  71. * instead "pops" the information about whether its stack output is
  72. * going to be needed. If not, the instruction can mark itself (and
  73. * all downstream instructions that depended on it) as removable.
  74. *
  75. * Locals:
  76. *
  77. * While a block is iterated backward, the set of live locals is
  78. * tracked. The initial state of this live set depends on whether
  79. * we are performing global or local DCE, and in the local case
  80. * includes all locals in the function.
  81. *
  82. * When a local may be read, it is added to the live set. When a
  83. * local is definitely-written, it is removed from the set.
  84. *
  85. * If a instruction may write to a local that is not live, it can be
  86. * marked as removable if it is known to have no other side-effects.
  87. * Currently this is only hooked up to SetL.
  88. *
  89. * Liveness analysis:
  90. *
  91. * The global algorithm first performs a liveness analysis to
  92. * propagate live out sets to each block.
  93. *
  94. * This analysis is basically normal, but slightly modified from the
  95. * usual in order to deal with the way exceptional control flow is
  96. * represented in our CFG (with factored edges).
  97. *
  98. * It essentially is the liveness analysis algorithm described at
  99. * http://dl.acm.org/citation.cfm?id=316171, except that we don't
  100. * need to track kill sets for each PEI because we don't have a
  101. * means of determining which factored edges may be traversed by any
  102. * given PEI. (Maybe that may be more usual to have in the context
  103. * of a language with declared exception clauses...)
  104. *
  105. * Since we only deal with the most pessimistic exception case, this
  106. * means for each block we just determine a gen and kill set, along
  107. * with a subset of the latter set that is the locals that must be
  108. * killed before any PEI. (See killBeforePEI below.)
  109. *
  110. * Final note about types:
  111. *
  112. * Global DCE can change the types of locals in a way that spans
  113. * basic blocks. For a simple example, take the following code:
  114. *
  115. * // $foo :: Uninit here.
  116. * $foo = 12;
  117. * // ... code that breaks a block ...
  118. * $foo = 100;
  119. *
  120. * If the first store to $foo is removed, the type of $foo before
  121. * the SetL in the later block is now Uninit, instead of Int.
  122. *
  123. * This means that after calling global_dce on a function, the type
  124. * information in the block input states in the associated
  125. * FuncAnalysis can no longer be considered accurate.
  126. *
  127. * Moreover, since global DCE makes use of type information to
  128. * determine whether a store is dead, we need to be careful that
  129. * this never changes whether the assumptions used to perform DCE
  130. * were correct.
  131. *
  132. * This is ok right now: DCE only uses the types to figure out which
  133. * values can either have lifetimes extended or shortened without
  134. * visible side-effects, or which values may be refs (so we can't
  135. * omit stores to them). If we omit a store that changes the type
  136. * of a local globally, this means the new type cannot be different
  137. * with regard to these features (or we wouldn't have omitted it),
  138. * which means we won't have made different decisions elsewhere in
  139. * the algorithm based on the new type.
  140. *
  141. * Specifically: we will never omit a store where the old local type
  142. * was something that could've had side effects, and if a new value
  143. * is stored into a local where destroying it could have
  144. * side-effects, some point along the path to the function exit (if
  145. * nothing else, the RetC) will have added it to the gen set and the
  146. * store also won't be removable.
  147. *
  148. * In contrast, note that local DCE can not change types across
  149. * block boundaries.
  150. *
  151. */
  152. namespace {
  153. //////////////////////////////////////////////////////////////////////
  154. // Returns whether decrefing a type could run a destructor.
  155. bool couldRunDestructor(const Type& t) {
  156. // We could check for specialized objects to see if they don't
  157. // declare a user-defined destructor, but currently don't.
  158. return t.couldBe(TObj) || t.couldBe(TCArr) || t.couldBe(TRef);
  159. }
  160. // Returns whether a set on something containing type t could have
  161. // side-effects (running destuctors, or modifying arbitrary things via
  162. // a Ref).
  163. bool setCouldHaveSideEffects(const Type& t) {
  164. return t.couldBe(TObj) || t.couldBe(TCArr) || t.couldBe(TRef);
  165. }
  166. // Some reads could raise warnings and run arbitrary code.
  167. bool readCouldHaveSideEffects(const Type& t) {
  168. return t.couldBe(TUninit);
  169. }
  170. //////////////////////////////////////////////////////////////////////
  171. /*
  172. * Use information of a stack cell.
  173. */
  174. enum class Use {
  175. // Indicates that the cell is (unconditionally) not used.
  176. Not,
  177. // Indicates that the cell is (possibly) used.
  178. Used,
  179. /*
  180. * Indicates that the cell is only used if it was the last reference alive.
  181. * For instance, a PopC will call the destructor of the top-of-stack object
  182. * if it was the last reference alive, and this counts as an example of
  183. * 'UsedIfLastRef'.
  184. *
  185. * If the producer of the cell knows that it is not the last reference, then
  186. * it can treat Use::UsedIfLastRef as being equivalent to Use::Not.
  187. */
  188. UsedIfLastRef,
  189. };
  190. using InstrId = size_t;
  191. using InstrIdSet = std::set<InstrId>;
  192. using UseInfo = std::pair<Use,InstrIdSet>;
  193. //////////////////////////////////////////////////////////////////////
  194. struct DceState {
  195. borrowed_ptr<const php::Func> func;
  196. /*
  197. * Eval stack use information. Stacks slots are marked as being
  198. * needed or not needed. If they aren't needed, they carry a set of
  199. * instructions that must be removed if the instruction that
  200. * produces the stack value is also removable.
  201. */
  202. std::vector<UseInfo> stack;
  203. /*
  204. * Locals known to be live at a point in a DCE walk. This is used
  205. * when we're actually acting on information we discovered during
  206. * liveness analysis.
  207. */
  208. std::bitset<kMaxTrackedLocals> liveLocals;
  209. /*
  210. * These variable sets are used to compute the transfer function for
  211. * the global liveness analysis in global_dce.
  212. *
  213. * The gen set accumulates the set of variables in the block with
  214. * upward-exposed uses. The kill set is the set of variables the
  215. * block will re-define, ignoring exceptional control flow.
  216. *
  217. * The killBeforePEI set is the set of variables killed before a
  218. * PEI. Propagation of liveness needs to use this (always more
  219. * conservative) set instead of kill when crossing a factored exit
  220. * edge.
  221. */
  222. std::bitset<kMaxTrackedLocals> gen;
  223. std::bitset<kMaxTrackedLocals> kill;
  224. std::bitset<kMaxTrackedLocals> killBeforePEI;
  225. /*
  226. * Instructions marked in this set are dead. If any of them are
  227. * removed, however, they must all be removed, because of the need
  228. * to keep eval stack consumers and producers balanced.
  229. */
  230. boost::dynamic_bitset<> markedDead{};
  231. /*
  232. * The set of locals that were ever live in this block. (This
  233. * includes locals that were live going out of this block.) This
  234. * set is used by global DCE to remove locals that are completely
  235. * unused in the entire function.
  236. */
  237. std::bitset<kMaxTrackedLocals> usedLocals;
  238. };
  239. //////////////////////////////////////////////////////////////////////
  240. const char* show(Use u) {
  241. switch (u) {
  242. case Use::Not: return "0";
  243. case Use::Used: return "U";
  244. case Use::UsedIfLastRef: return "UL";
  245. }
  246. not_reached();
  247. }
  248. std::string show(const InstrIdSet& set) {
  249. using namespace folly::gen;
  250. return from(set)
  251. | eachTo<std::string>()
  252. | unsplit<std::string>(";")
  253. ;
  254. }
  255. std::string show(const UseInfo& ui) {
  256. return folly::format("{}@{}", show(ui.first), show(ui.second)).str();
  257. }
  258. std::string bits_string(borrowed_ptr<const php::Func> func,
  259. std::bitset<kMaxTrackedLocals> locs) {
  260. std::ostringstream out;
  261. if (func->locals.size() < kMaxTrackedLocals) {
  262. for (auto i = func->locals.size(); i-- > 0;) {
  263. out << (locs.test(i) ? '1' : '0');
  264. }
  265. } else {
  266. out << locs;
  267. }
  268. return out.str();
  269. }
  270. //////////////////////////////////////////////////////////////////////
  271. struct Env {
  272. DceState& dceState;
  273. InstrId id;
  274. const State& stateBefore;
  275. const StepFlags& flags;
  276. };
  277. void markSetDead(Env& env, const InstrIdSet& set) {
  278. env.dceState.markedDead[env.id] = 1;
  279. FTRACE(2, " marking {} {}\n", env.id, show(set));
  280. for (auto& i : set) env.dceState.markedDead[i] = 1;
  281. }
  282. void markDead(Env& env) {
  283. env.dceState.markedDead[env.id] = 1;
  284. FTRACE(2, " marking {}\n", env.id);
  285. }
  286. //////////////////////////////////////////////////////////////////////
  287. // eval stack
  288. void pop(Env& env, Use u, InstrIdSet set) {
  289. FTRACE(2, " pop({})\n", show(u));
  290. env.dceState.stack.emplace_back(u, std::move(set));
  291. }
  292. void pop(Env& env) { pop(env, Use::Used, InstrIdSet{}); }
  293. Type topT(Env& env, uint32_t idx = 0) {
  294. assert(idx < env.stateBefore.stack.size());
  295. return env.stateBefore.stack[env.stateBefore.stack.size() - idx - 1];
  296. }
  297. Type topC(Env& env, uint32_t idx = 0) {
  298. auto const t = topT(env, idx);
  299. assert(t.subtypeOf(TInitCell));
  300. return t;
  301. }
  302. void discard(Env& env) {
  303. pop(env, Use::Not, InstrIdSet{env.id});
  304. }
  305. bool allUnused() { return true; }
  306. template<class... Args>
  307. bool allUnused(const UseInfo& ui, Args&&... args) {
  308. return ui.first == Use::Not &&
  309. allUnused(std::forward<Args>(args)...);
  310. }
  311. void combineSets(InstrIdSet&) {}
  312. template<class... Args>
  313. void combineSets(InstrIdSet& accum, const UseInfo& ui, Args&&... args) {
  314. accum.insert(begin(ui.second), end(ui.second));
  315. combineSets(accum, std::forward<Args>(args)...);
  316. }
  317. // If all the supplied UseInfos represent unused stack slots, make a
  318. // pop that is considered unused. Otherwise pop as a Use::Used.
  319. template<class... Args>
  320. void popCond(Env& env, Args&&... args) {
  321. bool unused = allUnused(std::forward<Args>(args)...);
  322. if (!unused) return pop(env, Use::Used, InstrIdSet{});
  323. auto accum = InstrIdSet{env.id};
  324. combineSets(accum, std::forward<Args>(args)...);
  325. pop(env, Use::Not, accum);
  326. }
  327. /*
  328. * It may be ok to remove pops on objects with destructors in some scenarios
  329. * (where it won't change the observable point at which a destructor runs). We
  330. * could also look at the object type and see if it is known that it can't have
  331. * a user-defined destructor.
  332. *
  333. * For now, we mark the cell popped with a Use::UsedIfLastRef. This indicates
  334. * to the producer of the cell that the it is considered used if it could be
  335. * the last reference alive (in which case the destructor would be run on
  336. * Pop). If the producer knows that the cell is not the last reference (e.g. if
  337. * it is a Dup), then Use:UsedIfLastRef is equivalent to Use::Not.
  338. */
  339. void discardNonDtors(Env& env) {
  340. auto const t = topC(env);
  341. if (couldRunDestructor(t)) {
  342. return pop(env, Use::UsedIfLastRef, InstrIdSet{env.id});
  343. }
  344. discard(env);
  345. }
  346. UseInfo push(Env& env) {
  347. always_assert(!env.dceState.stack.empty());
  348. auto ret = env.dceState.stack.back();
  349. env.dceState.stack.pop_back();
  350. FTRACE(2, " {}@{} = push()\n", show(ret.first), show(ret.second));
  351. return ret;
  352. }
  353. void pushRemovable(Env& env) {
  354. auto const ui = push(env);
  355. switch (ui.first) {
  356. case Use::Not:
  357. markSetDead(env, ui.second);
  358. break;
  359. case Use::Used:
  360. case Use::UsedIfLastRef:
  361. break;
  362. }
  363. }
  364. //////////////////////////////////////////////////////////////////////
  365. // locals
  366. void addGenSet(Env& env, std::bitset<kMaxTrackedLocals> locs) {
  367. FTRACE(4, " conservative: {}\n", bits_string(env.dceState.func, locs));
  368. env.dceState.liveLocals |= locs;
  369. env.dceState.gen |= locs;
  370. env.dceState.kill &= ~locs;
  371. env.dceState.killBeforePEI &= ~locs;
  372. }
  373. void addGen(Env& env, uint32_t id) {
  374. FTRACE(2, " gen: {}\n", id);
  375. if (id >= kMaxTrackedLocals) return;
  376. env.dceState.liveLocals[id] = 1;
  377. env.dceState.gen[id] = 1;
  378. env.dceState.kill[id] = 0;
  379. env.dceState.killBeforePEI[id] = 0;
  380. }
  381. void addKill(Env& env, uint32_t id) {
  382. FTRACE(2, " kill: {}\n", id);
  383. if (id >= kMaxTrackedLocals) return;
  384. env.dceState.liveLocals[id] = 0;
  385. env.dceState.gen[id] = 0;
  386. env.dceState.kill[id] = 1;
  387. env.dceState.killBeforePEI[id] = 1;
  388. }
  389. bool isLive(Env& env, uint32_t id) {
  390. if (id >= kMaxTrackedLocals) {
  391. // Conservatively assume it's potentially live.
  392. return true;
  393. }
  394. return env.dceState.liveLocals[id];
  395. }
  396. Type locRaw(Env& env, borrowed_ptr<php::Local> loc) {
  397. return env.stateBefore.locals[loc->id];
  398. }
  399. void readDtorLocs(Env& env) {
  400. for (auto i = size_t{0}; i < env.stateBefore.locals.size(); ++i) {
  401. if (couldRunDestructor(env.stateBefore.locals[i])) {
  402. addGen(env, i);
  403. }
  404. }
  405. }
  406. //////////////////////////////////////////////////////////////////////
  407. /*
  408. * Note that the instructions with popConds are relying on the consumer of the
  409. * values they push to check whether lifetime changes can have side-effects.
  410. *
  411. * For example, in bytecode like this, assuming $x is an object with a
  412. * destructor:
  413. *
  414. * CGetL $x
  415. * UnsetL $x
  416. * // ...
  417. * PopC $x // dtor should be here.
  418. *
  419. * The PopC will decide it can't be eliminated, which prevents us from
  420. * eliminating the CGetL.
  421. */
  422. void dce(Env& env, const bc::PopC&) { discardNonDtors(env); }
  423. // For PopV and PopR currently we never know if can't run a
  424. // destructor.
  425. void dce(Env& env, const bc::PopA&) { discard(env); }
  426. void dce(Env& env, const bc::Int&) { pushRemovable(env); }
  427. void dce(Env& env, const bc::String&) { pushRemovable(env); }
  428. void dce(Env& env, const bc::Array&) { pushRemovable(env); }
  429. void dce(Env& env, const bc::Double&) { pushRemovable(env); }
  430. void dce(Env& env, const bc::True&) { pushRemovable(env); }
  431. void dce(Env& env, const bc::False&) { pushRemovable(env); }
  432. void dce(Env& env, const bc::Null&) { pushRemovable(env); }
  433. void dce(Env& env, const bc::NullUninit&) { pushRemovable(env); }
  434. void dce(Env& env, const bc::File&) { pushRemovable(env); }
  435. void dce(Env& env, const bc::Dir&) { pushRemovable(env); }
  436. void dce(Env& env, const bc::NameA&) { popCond(env, push(env)); }
  437. void dce(Env& env, const bc::NewArray&) { pushRemovable(env); }
  438. void dce(Env& env, const bc::NewCol&) { pushRemovable(env); }
  439. void dce(Env& env, const bc::AGetC&) { popCond(env, push(env)); }
  440. void dce(Env& env, const bc::Dup&) {
  441. auto const u1 = push(env);
  442. auto const u2 = push(env);
  443. // Dup pushes a cell that is guaranteed to be not the last reference.
  444. // So, it can be eliminated if the cell it pushes is used as either
  445. // Use::Not or Use::UsedIfLastRef.
  446. // The cell it pops can be marked Use::Not only if Dup itself
  447. // can be eliminated.
  448. switch (u1.first) {
  449. case Use::Not:
  450. case Use::UsedIfLastRef:
  451. // It is ok to eliminate the Dup even if its second output u2
  452. // is used, because eliminating the Dup still leaves the second
  453. // output u2 on stack.
  454. markSetDead(env, u1.second);
  455. switch (u2.first) {
  456. case Use::Not:
  457. pop(env, Use::Not, u2.second);
  458. break;
  459. case Use::Used:
  460. case Use::UsedIfLastRef:
  461. pop(env, Use::Used, InstrIdSet{});
  462. break;
  463. }
  464. break;
  465. case Use::Used:
  466. pop(env, Use::Used, InstrIdSet{});
  467. break;
  468. }
  469. }
  470. void dce(Env& env, const bc::CGetL& op) {
  471. auto const ty = locRaw(env, op.loc1);
  472. addGen(env, op.loc1->id);
  473. if (readCouldHaveSideEffects(ty)) {
  474. push(env);
  475. } else {
  476. pushRemovable(env);
  477. }
  478. }
  479. void dce(Env& env, const bc::CGetL2& op) {
  480. auto const ty = locRaw(env, op.loc1);
  481. addGen(env, op.loc1->id);
  482. auto const u1 = push(env);
  483. auto const u2 = push(env);
  484. if (readCouldHaveSideEffects(ty)) {
  485. pop(env);
  486. } else {
  487. popCond(env, u1, u2);
  488. }
  489. }
  490. void dce(Env& env, const bc::CGetL3& op) {
  491. auto const ty = locRaw(env, op.loc1);
  492. addGen(env, op.loc1->id);
  493. auto const u1 = push(env);
  494. auto const u2 = push(env);
  495. auto const u3 = push(env);
  496. if (readCouldHaveSideEffects(ty)) {
  497. pop(env);
  498. pop(env);
  499. } else {
  500. popCond(env, u1, u2, u3);
  501. popCond(env, u1, u2, u3);
  502. }
  503. }
  504. void dce(Env& env, const bc::RetC&) { pop(env); readDtorLocs(env); }
  505. void dce(Env& env, const bc::Throw&) { pop(env); readDtorLocs(env); }
  506. void dce(Env& env, const bc::Fatal&) { pop(env); readDtorLocs(env); }
  507. void dce(Env& env, const bc::Exit&) { push(env); pop(env); readDtorLocs(env); }
  508. void dce(Env& env, const bc::SetL& op) {
  509. auto const oldTy = locRaw(env, op.loc1);
  510. auto const effects = setCouldHaveSideEffects(oldTy);
  511. if (!isLive(env, op.loc1->id) && !effects) return markDead(env);
  512. push(env);
  513. pop(env);
  514. if (!effects) addKill(env, op.loc1->id);
  515. if (effects) addGen(env, op.loc1->id);
  516. }
  517. /*
  518. * IncDecL is a read-modify-write: can be removed if the local isn't live, the
  519. * set can't have side effects, and no one reads the value it pushes. If the
  520. * instruction is not dead, always add the local to the set of upward exposed
  521. * uses.
  522. */
  523. void dce(Env& env, const bc::IncDecL& op) {
  524. auto const oldTy = locRaw(env, op.loc1);
  525. auto const effects = setCouldHaveSideEffects(oldTy) ||
  526. readCouldHaveSideEffects(oldTy);
  527. auto const u1 = push(env);
  528. if (!isLive(env, op.loc1->id) && !effects && allUnused(u1)) {
  529. return markSetDead(env, u1.second);
  530. }
  531. addGen(env, op.loc1->id);
  532. }
  533. /*
  534. * SetOpL is like IncDecL, but with the complication that we don't know if we
  535. * can mark it dead when visiting it, because it is going to pop an input but
  536. * unlike SetL doesn't push the value it popped. For the current scheme we
  537. * just add the local to gen even if we're doing a removable push, which is
  538. * correct but could definitely fail to eliminate some earlier stores.
  539. */
  540. void dce(Env& env, const bc::SetOpL& op) {
  541. auto const oldTy = locRaw(env, op.loc1);
  542. auto const effects = setCouldHaveSideEffects(oldTy) ||
  543. readCouldHaveSideEffects(oldTy);
  544. if (!isLive(env, op.loc1->id) && !effects) {
  545. popCond(env, push(env));
  546. } else {
  547. push(env);
  548. pop(env);
  549. }
  550. addGen(env, op.loc1->id);
  551. }
  552. /*
  553. * Default implementation is conservative: assume we use all of our
  554. * inputs, and can't be removed even if our output is unused.
  555. *
  556. * We also assume all the locals in the mayReadLocalSet must be
  557. * added to the live local set, and don't remove anything from it.
  558. */
  559. template<class Op>
  560. void dce(Env& env, const Op& op) {
  561. addGenSet(env, env.flags.mayReadLocalSet);
  562. env.dceState.liveLocals |= env.flags.mayReadLocalSet;
  563. for (auto i = uint32_t{0}; i < op.numPush(); ++i) {
  564. push(env);
  565. }
  566. for (auto i = uint32_t{0}; i < op.numPop(); ++i) {
  567. pop(env, Use::Used, InstrIdSet{});
  568. }
  569. }
  570. void dispatch_dce(Env& env, const Bytecode& op) {
  571. #define O(opcode, ...) case Op::opcode: dce(env, op.opcode); return;
  572. switch (op.op) { OPCODES }
  573. #undef O
  574. not_reached();
  575. }
  576. //////////////////////////////////////////////////////////////////////
  577. folly::Optional<DceState>
  578. dce_visit(const Index& index,
  579. Context const ctx,
  580. borrowed_ptr<const php::Block> const blk,
  581. const State& stateIn,
  582. std::bitset<kMaxTrackedLocals> liveOut,
  583. std::bitset<kMaxTrackedLocals> liveOutExn) {
  584. if (!stateIn.initialized) {
  585. /*
  586. * Skip unreachable blocks.
  587. *
  588. * For DCE analysis it is ok to assume the transfer function is
  589. * the identity on unreachable blocks (i.e. gen and kill sets are
  590. * empty). For optimize, we don't need to bother doing anything
  591. * to these---another pass is responsible for removing completely
  592. * unreachable blocks.
  593. */
  594. return folly::none;
  595. }
  596. auto const states = locally_propagated_states(index, ctx, blk, stateIn);
  597. auto dceState = DceState{};
  598. dceState.func = ctx.func;
  599. dceState.markedDead.resize(blk->hhbcs.size());
  600. dceState.liveLocals = liveOut;
  601. dceState.stack.resize(states.back().first.stack.size());
  602. dceState.usedLocals = liveOut;
  603. for (auto& s : dceState.stack) {
  604. s = UseInfo { Use::Used, InstrIdSet{} };
  605. }
  606. for (auto idx = blk->hhbcs.size(); idx-- > 0;) {
  607. auto const& op = blk->hhbcs[idx];
  608. FTRACE(2, " == #{} {}\n", idx, show(op));
  609. auto visit_env = Env {
  610. dceState,
  611. idx,
  612. states[idx].first,
  613. states[idx].second
  614. };
  615. dispatch_dce(visit_env, op);
  616. /*
  617. * When we see a PEI, we need to start over on the killBeforePEI
  618. * set, and the local-liveness must take into account the fact
  619. * that we could take an exception edge here (or'ing in the
  620. * liveOutExn set).
  621. */
  622. if (states[idx].second.wasPEI) {
  623. FTRACE(2, " <-- exceptions\n");
  624. dceState.liveLocals |= liveOutExn;
  625. dceState.killBeforePEI.reset();
  626. }
  627. dceState.usedLocals |= dceState.liveLocals;
  628. FTRACE(4, " dce stack: {}\n",
  629. [&] {
  630. using namespace folly::gen;
  631. return from(dceState.stack)
  632. | map([&] (const UseInfo& ui) { return show(ui); })
  633. | unsplit<std::string>(" ");
  634. }()
  635. );
  636. FTRACE(4, " interp stack: {}\n",
  637. [&] {
  638. using namespace folly::gen;
  639. return from(states[idx].first.stack)
  640. | map([&] (const Type& t) { return show(t); })
  641. | unsplit<std::string>(" ");
  642. }()
  643. );
  644. // We're now at the state before this instruction, so the stack
  645. // sizes must line up.
  646. assert(dceState.stack.size() == states[idx].first.stack.size());
  647. }
  648. return dceState;
  649. }
  650. struct DceAnalysis {
  651. std::bitset<kMaxTrackedLocals> gen;
  652. std::bitset<kMaxTrackedLocals> kill;
  653. std::bitset<kMaxTrackedLocals> killExn;
  654. };
  655. DceAnalysis analyze_dce(const Index& index,
  656. Context const ctx,
  657. borrowed_ptr<php::Block> const blk,
  658. const State& stateIn) {
  659. // During this analysis pass, we have to assume everything could be
  660. // live out, so we set allLive here. (Later we'll determine the
  661. // real liveOut sets.)
  662. auto allLive = std::bitset<kMaxTrackedLocals>{};
  663. allLive.set();
  664. if (auto dceState = dce_visit(index, ctx, blk, stateIn, allLive, allLive)) {
  665. return DceAnalysis {
  666. dceState->gen,
  667. dceState->kill,
  668. dceState->killBeforePEI
  669. };
  670. }
  671. return DceAnalysis {};
  672. }
  673. std::bitset<kMaxTrackedLocals>
  674. optimize_dce(const Index& index,
  675. Context const ctx,
  676. borrowed_ptr<php::Block> const blk,
  677. const State& stateIn,
  678. std::bitset<kMaxTrackedLocals> liveOut,
  679. std::bitset<kMaxTrackedLocals> liveOutExn) {
  680. auto const dceState = dce_visit(index, ctx, blk,
  681. stateIn, liveOut, liveOutExn);
  682. if (!dceState) return std::bitset<kMaxTrackedLocals>{};
  683. // Remove all instructions that were marked dead, and replace
  684. // instructions that can be replaced with pops but aren't dead.
  685. for (auto idx = blk->hhbcs.size(); idx-- > 0;) {
  686. if (!dceState->markedDead.test(idx)) continue;
  687. blk->hhbcs.erase(begin(blk->hhbcs) + idx);
  688. }
  689. // Blocks must be non-empty. Make sure we don't change that.
  690. if (blk->hhbcs.empty()) {
  691. blk->hhbcs.push_back(bc::Nop {});
  692. }
  693. return dceState->usedLocals;
  694. }
  695. //////////////////////////////////////////////////////////////////////
  696. void remove_unused_locals(Context const ctx,
  697. std::bitset<kMaxTrackedLocals> usedLocals) {
  698. if (!options.RemoveUnusedLocals) return;
  699. auto const func = ctx.func;
  700. /*
  701. * Removing unused locals in closures requires checking which ones
  702. * are captured variables so we can remove the relevant properties,
  703. * and then we'd have to mutate the CreateCl callsite, so we don't
  704. * bother for now.
  705. *
  706. * Note: many closure bodies have unused $this local, because of
  707. * some emitter quirk, so this might be worthwhile.
  708. */
  709. if (func->isClosureBody) return;
  710. func->locals.erase(
  711. std::remove_if(
  712. begin(func->locals) + func->params.size(),
  713. end(func->locals),
  714. [&] (const std::unique_ptr<php::Local>& l) {
  715. if (l->id < kMaxTrackedLocals && !usedLocals.test(l->id)) {
  716. FTRACE(2, " removing: {}\n", local_string(borrow(l)));
  717. return true;
  718. }
  719. return false;
  720. }
  721. ),
  722. end(func->locals)
  723. );
  724. // Fixup local ids, in case we removed any.
  725. for (auto i = uint32_t{0}; i < func->locals.size(); ++i) {
  726. func->locals[i]->id = i;
  727. }
  728. }
  729. //////////////////////////////////////////////////////////////////////
  730. }
  731. void local_dce(const Index& index,
  732. const FuncAnalysis& ainfo,
  733. borrowed_ptr<php::Block> const blk,
  734. const State& stateIn) {
  735. Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
  736. is_systemlib_part(*ainfo.ctx.unit)};
  737. // For local DCE, we have to assume all variables are in the
  738. // live-out set for the block.
  739. auto allLive = std::bitset<kMaxTrackedLocals>();
  740. allLive.set();
  741. optimize_dce(index, ainfo.ctx, blk, stateIn, allLive, allLive);
  742. }
  743. //////////////////////////////////////////////////////////////////////
  744. void global_dce(const Index& index, const FuncAnalysis& ai) {
  745. Trace::Bump bumper{Trace::hhbbc_dce, kSystemLibBump,
  746. is_systemlib_part(*ai.ctx.unit)};
  747. auto rpoId = [&] (borrowed_ptr<php::Block> blk) {
  748. return ai.bdata[blk->id].rpoId;
  749. };
  750. FTRACE(1, "|---- global DCE analyze ({})\n", show(ai.ctx));
  751. FTRACE(2, "{}", [&] {
  752. using namespace folly::gen;
  753. auto i = uint32_t{0};
  754. return from(ai.ctx.func->locals)
  755. | mapped(
  756. [&] (const std::unique_ptr<php::Local>& l) {
  757. return folly::sformat(" {} {}\n", i++, local_string(borrow(l)));
  758. })
  759. | unsplit<std::string>("");
  760. }());
  761. /*
  762. * Create a DceAnalysis for each block, indexed by rpo id.
  763. *
  764. * Here we want to pre-compute the transfer function for each block,
  765. * so we don't need to visit each instruction repeatedly during the
  766. * fixed point computation. The transfer function is a function of
  767. * the set of locals read and killed in the block, and does not
  768. * depend on the final live out state, so we can compute it here.
  769. */
  770. auto blockAnalysis = std::vector<DceAnalysis>{};
  771. for (auto& b : ai.rpoBlocks) {
  772. FTRACE(2, "block #{}\n", b->id);
  773. auto const dinfo = analyze_dce(
  774. index,
  775. ai.ctx,
  776. b,
  777. ai.bdata[b->id].stateIn
  778. );
  779. blockAnalysis.push_back(dinfo);
  780. }
  781. /*
  782. * States for each block, indexed by RPO id.
  783. *
  784. * The liveOut state is the union of liveIn states of each normal
  785. * successor, and liveOutExn is the union of liveIn states of each
  786. * exceptional successor.
  787. */
  788. struct BlockState {
  789. std::bitset<kMaxTrackedLocals> liveOut;
  790. std::bitset<kMaxTrackedLocals> liveOutExn;
  791. };
  792. std::vector<BlockState> blockStates(ai.rpoBlocks.size());
  793. /*
  794. * Set of block reverse post order ids that still need to be
  795. * visited. This is ordered by std::greater, so we'll generally
  796. * visit blocks before their predecessors. (The algorithm doesn't
  797. * need this for correctness, but it might make it iterate less.)
  798. *
  799. * Every block must be visited at least once, so we throw them all
  800. * in to start.
  801. */
  802. auto incompleteQ = dataflow_worklist<uint32_t,std::less<uint32_t>>(
  803. ai.rpoBlocks.size()
  804. );
  805. for (auto& b : ai.rpoBlocks) incompleteQ.push(rpoId(b));
  806. auto const normalPreds = computeNormalPreds(ai.rpoBlocks);
  807. auto const factoredPreds = computeFactoredPreds(ai.rpoBlocks);
  808. /*
  809. * Iterate on live out states until we reach a fixed point.
  810. *
  811. * This algorithm treats the exceptional live-out states differently
  812. * from the live-out states during normal control flow. The
  813. * liveOutExn sets only take part in the liveIn computation when the
  814. * block has factored exits.
  815. */
  816. while (!incompleteQ.empty()) {
  817. auto const blk = ai.rpoBlocks[incompleteQ.pop()];
  818. FTRACE(2, "block #{}\n", blk->id);
  819. auto const liveOut = blockStates[rpoId(blk)].liveOut;
  820. auto const liveOutExn = blockStates[rpoId(blk)].liveOutExn;
  821. auto const transfer = blockAnalysis[rpoId(blk)];
  822. auto const liveIn = transfer.gen | (liveOut & ~transfer.kill)
  823. | (liveOutExn & ~transfer.killExn);
  824. FTRACE(2, "live out : {}\n"
  825. "out exn : {}\n"
  826. "gen : {}\n"
  827. "kill : {}\n"
  828. "kill exn : {}\n"
  829. "live in : {}\n",
  830. bits_string(ai.ctx.func, liveOut),
  831. bits_string(ai.ctx.func, liveOutExn),
  832. bits_string(ai.ctx.func, transfer.gen),
  833. bits_string(ai.ctx.func, transfer.kill),
  834. bits_string(ai.ctx.func, transfer.killExn),
  835. bits_string(ai.ctx.func, liveIn));
  836. // Merge the liveIn into the liveOut of each normal predecessor.
  837. // If the set changes, reschedule that predecessor.
  838. for (auto& pred : normalPreds[blk->id]) {
  839. FTRACE(2, " -> {}\n", pred->id);
  840. auto& predState = blockStates[rpoId(pred)].liveOut;
  841. auto const oldPredState = predState;
  842. predState |= liveIn;
  843. if (predState != oldPredState) {
  844. incompleteQ.push(rpoId(pred));
  845. }
  846. }
  847. // Merge the liveIn into the liveOutExn state for each exceptional
  848. // precessor. The liveIn computation also depends on the
  849. // liveOutExn state, so again reschedule if it changes.
  850. for (auto& pred : factoredPreds[blk->id]) {
  851. FTRACE(2, " => {}\n", pred->id);
  852. auto& predState = blockStates[rpoId(pred)].liveOutExn;
  853. auto const oldPredState = predState;
  854. predState |= liveIn;
  855. if (predState != oldPredState) {
  856. incompleteQ.push(rpoId(pred));
  857. }
  858. }
  859. }
  860. /*
  861. * Now that we're at a fixed point, use the propagated states to
  862. * remove instructions that don't need to be there.
  863. */
  864. FTRACE(1, "|---- global DCE optimize ({})\n", show(ai.ctx));
  865. std::bitset<kMaxTrackedLocals> usedLocals;
  866. for (auto& b : ai.rpoBlocks) {
  867. FTRACE(2, "block #{}\n", b->id);
  868. usedLocals |= optimize_dce(
  869. index,
  870. ai.ctx,
  871. b,
  872. ai.bdata[b->id].stateIn,
  873. blockStates[rpoId(b)].liveOut,
  874. blockStates[rpoId(b)].liveOutExn
  875. );
  876. }
  877. FTRACE(1, " used locals: {}\n", bits_string(ai.ctx.func, usedLocals));
  878. remove_unused_locals(ai.ctx, usedLocals);
  879. }
  880. //////////////////////////////////////////////////////////////////////
  881. void remove_unreachable_blocks(const Index& index, const FuncAnalysis& ainfo) {
  882. boost::dynamic_bitset<> reachable(ainfo.ctx.func->nextBlockId);
  883. for (auto& blk : ainfo.rpoBlocks) {
  884. reachable[blk->id] = ainfo.bdata[blk->id].stateIn.initialized;
  885. if (reachable[blk->id]) continue;
  886. auto const srcLoc = blk->hhbcs.front().srcLoc;
  887. blk->hhbcs = {
  888. bc_with_loc(srcLoc, bc::String { s_unreachable.get() }),
  889. bc_with_loc(srcLoc, bc::Fatal { FatalOp::Runtime })
  890. };
  891. blk->fallthrough = nullptr;
  892. }
  893. if (!options.RemoveDeadBlocks) return;
  894. for (auto& blk : ainfo.rpoBlocks) {
  895. auto reachableTargets = false;
  896. forEachTakenEdge(blk->hhbcs.back(), [&] (php::Block& target) {
  897. if (reachable[target.id]) reachableTargets = true;
  898. });
  899. if (reachableTargets) continue;
  900. switch (blk->hhbcs.back().op) {
  901. case Op::JmpNZ:
  902. case Op::JmpZ:
  903. blk->hhbcs.back() = bc_with_loc(blk->hhbcs.back().srcLoc, bc::PopC {});
  904. break;
  905. default:
  906. break;
  907. }
  908. }
  909. }
  910. //////////////////////////////////////////////////////////////////////
  911. }}