PageRenderTime 48ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/mordor/scheduler.cpp

http://github.com/mozy/mordor
C++ | 422 lines | 353 code | 32 blank | 37 comment | 117 complexity | 17524c910d76d47dca60ecf13d992888 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. // Copyright (c) 2009 - Mozy, Inc.
  2. #include "scheduler.h"
  3. #include <boost/bind.hpp>
  4. #include "atomic.h"
  5. #include "assert.h"
  6. #include "fiber.h"
  7. namespace Mordor {
  8. static Logger::ptr g_log = Log::lookup("mordor:scheduler");
  9. ThreadLocalStorage<Scheduler *> Scheduler::t_scheduler;
  10. ThreadLocalStorage<Fiber *> Scheduler::t_fiber;
  11. Scheduler::Scheduler(size_t threads, bool useCaller, size_t batchSize)
  12. : m_activeThreadCount(0),
  13. m_idleThreadCount(0),
  14. m_stopping(true),
  15. m_autoStop(false),
  16. m_batchSize(batchSize)
  17. {
  18. MORDOR_ASSERT(threads >= 1);
  19. if (useCaller) {
  20. --threads;
  21. MORDOR_ASSERT(getThis() == NULL);
  22. t_scheduler = this;
  23. m_rootFiber.reset(new Fiber(boost::bind(&Scheduler::run, this)));
  24. t_scheduler = this;
  25. t_fiber = m_rootFiber.get();
  26. m_rootThread = gettid();
  27. } else {
  28. m_rootThread = emptytid();
  29. }
  30. m_threadCount = threads;
  31. }
  32. Scheduler::~Scheduler()
  33. {
  34. MORDOR_NOTHROW_ASSERT(m_stopping);
  35. if (getThis() == this) {
  36. t_scheduler = NULL;
  37. }
  38. }
  39. Scheduler *
  40. Scheduler::getThis()
  41. {
  42. return t_scheduler.get();
  43. }
  44. void
  45. Scheduler::start()
  46. {
  47. MORDOR_LOG_VERBOSE(g_log) << this << " starting " << m_threadCount << " threads";
  48. boost::mutex::scoped_lock lock(m_mutex);
  49. if (!m_stopping)
  50. return;
  51. // TODO: There may be a race condition here if one thread calls stop(),
  52. // and another thread calls start() before the worker threads for this
  53. // scheduler actually exit; they may resurrect themselves, and the stopping
  54. // thread would block waiting for the thread to exit
  55. m_stopping = false;
  56. MORDOR_ASSERT(m_threads.empty());
  57. m_threads.resize(m_threadCount);
  58. for (size_t i = 0; i < m_threadCount; ++i) {
  59. m_threads[i] = boost::shared_ptr<Thread>(new Thread(
  60. boost::bind(&Scheduler::run, this)));
  61. }
  62. }
  63. bool
  64. Scheduler::hasWorkToDo()
  65. {
  66. boost::mutex::scoped_lock lock(m_mutex);
  67. return !m_fibers.empty();
  68. }
  69. void
  70. Scheduler::stop()
  71. {
  72. // Already stopped
  73. if (m_rootFiber &&
  74. m_threadCount == 0 &&
  75. (m_rootFiber->state() == Fiber::TERM || m_rootFiber->state() == Fiber::INIT)) {
  76. MORDOR_LOG_VERBOSE(g_log) << this << " stopped";
  77. m_stopping = true;
  78. // A derived class may inhibit stopping while it has things to do in
  79. // its idle loop, so we can't break early
  80. if (stopping())
  81. return;
  82. }
  83. bool exitOnThisFiber = false;
  84. if (m_rootThread != emptytid()) {
  85. // A thread-hijacking scheduler must be stopped
  86. // from within itself to return control to the
  87. // original thread
  88. MORDOR_ASSERT(Scheduler::getThis() == this);
  89. if (Fiber::getThis() == m_callingFiber) {
  90. exitOnThisFiber = true;
  91. // First switch to the correct thread
  92. MORDOR_LOG_DEBUG(g_log) << this
  93. << " switching to root thread to stop";
  94. switchTo(m_rootThread);
  95. }
  96. if (!m_callingFiber)
  97. exitOnThisFiber = true;
  98. } else {
  99. // A spawned-threads only scheduler cannot be stopped from within
  100. // itself... who would get control?
  101. MORDOR_ASSERT(Scheduler::getThis() != this);
  102. }
  103. m_stopping = true;
  104. for (size_t i = 0; i < m_threadCount; ++i)
  105. tickle();
  106. if (m_rootFiber && (m_threadCount != 0u || Scheduler::getThis() != this))
  107. tickle();
  108. // Wait for all work to stop on this thread
  109. if (exitOnThisFiber) {
  110. while (!stopping()) {
  111. // Give this thread's run fiber a chance to kill itself off
  112. MORDOR_LOG_DEBUG(g_log) << this
  113. << " yielding to this thread to stop";
  114. yieldTo(true);
  115. }
  116. }
  117. // Wait for other threads to stop
  118. if (exitOnThisFiber ||
  119. Scheduler::getThis() != this) {
  120. MORDOR_LOG_DEBUG(g_log) << this
  121. << " waiting for other threads to stop";
  122. std::vector<boost::shared_ptr<Thread> > threads;
  123. {
  124. boost::mutex::scoped_lock lock(m_mutex);
  125. threads.swap(m_threads);
  126. }
  127. for (std::vector<boost::shared_ptr<Thread> >::const_iterator it
  128. (threads.begin());
  129. it != threads.end();
  130. ++it) {
  131. (*it)->join();
  132. }
  133. }
  134. MORDOR_LOG_VERBOSE(g_log) << this << " stopped";
  135. }
  136. bool
  137. Scheduler::stopping()
  138. {
  139. boost::mutex::scoped_lock lock(m_mutex);
  140. return m_stopping && m_fibers.empty() && m_activeThreadCount == 0;
  141. }
  142. void
  143. Scheduler::switchTo(tid_t thread)
  144. {
  145. MORDOR_ASSERT(Scheduler::getThis() != NULL);
  146. if (Scheduler::getThis() == this) {
  147. if (thread == emptytid() || thread == gettid())
  148. return;
  149. }
  150. MORDOR_LOG_DEBUG(g_log) << this << " switching to thread " << thread;
  151. schedule(Fiber::getThis(), thread);
  152. Scheduler::yieldTo();
  153. }
  154. void
  155. Scheduler::yieldTo()
  156. {
  157. Scheduler *self = Scheduler::getThis();
  158. MORDOR_ASSERT(self);
  159. MORDOR_LOG_DEBUG(g_log) << self << " yielding to scheduler";
  160. MORDOR_ASSERT(t_fiber.get());
  161. if (self->m_rootThread == gettid() &&
  162. (t_fiber->state() == Fiber::INIT || t_fiber->state() == Fiber::TERM)) {
  163. self->m_callingFiber = Fiber::getThis();
  164. self->yieldTo(true);
  165. } else {
  166. self->yieldTo(false);
  167. }
  168. }
  169. void
  170. Scheduler::yield()
  171. {
  172. MORDOR_ASSERT(Scheduler::getThis());
  173. Scheduler::getThis()->schedule(Fiber::getThis());
  174. yieldTo();
  175. }
  176. void
  177. Scheduler::dispatch()
  178. {
  179. MORDOR_LOG_DEBUG(g_log) << this << " dispatching";
  180. MORDOR_ASSERT(m_rootThread == gettid() && m_threadCount == 0);
  181. m_stopping = true;
  182. m_autoStop = true;
  183. yieldTo();
  184. m_autoStop = false;
  185. }
  186. void
  187. Scheduler::threadCount(size_t threads)
  188. {
  189. MORDOR_ASSERT(threads >= 1);
  190. if (m_rootFiber)
  191. --threads;
  192. boost::mutex::scoped_lock lock(m_mutex);
  193. if (threads == m_threadCount) {
  194. return;
  195. } else if (threads > m_threadCount) {
  196. m_threads.resize(threads);
  197. for (size_t i = m_threadCount; i < threads; ++i)
  198. m_threads[i] = boost::shared_ptr<Thread>(new Thread(
  199. boost::bind(&Scheduler::run, this)));
  200. }
  201. m_threadCount = threads;
  202. }
  203. void
  204. Scheduler::yieldTo(bool yieldToCallerOnTerminate)
  205. {
  206. MORDOR_ASSERT(t_fiber.get());
  207. MORDOR_ASSERT(Scheduler::getThis() == this);
  208. if (yieldToCallerOnTerminate)
  209. MORDOR_ASSERT(m_rootThread == gettid());
  210. if (t_fiber->state() != Fiber::HOLD) {
  211. m_stopping = m_autoStop || m_stopping;
  212. // XXX: is t_fiber the hijacked thread ?
  213. t_fiber->reset(boost::bind(&Scheduler::run, this));
  214. }
  215. t_fiber->yieldTo(yieldToCallerOnTerminate);
  216. }
  217. void
  218. Scheduler::run()
  219. {
  220. setThis();
  221. if (gettid() != m_rootThread) {
  222. // Running in own thread
  223. t_fiber = Fiber::getThis().get();
  224. } else {
  225. // Hijacked a thread
  226. MORDOR_ASSERT(t_fiber.get() == Fiber::getThis().get());
  227. }
  228. Fiber::ptr idleFiber(new Fiber(boost::bind(&Scheduler::idle, this)));
  229. MORDOR_LOG_VERBOSE(g_log) << this << " starting thread with idle fiber " << idleFiber;
  230. Fiber::ptr dgFiber;
  231. // use a deque for O(1) .size() and pop_front()
  232. std::deque<FiberAndThread> batch;
  233. bool isActive = false;
  234. while (true) {
  235. MORDOR_ASSERT(batch.empty());
  236. bool dontIdle = false;
  237. bool tickleMe = false;
  238. {
  239. boost::mutex::scoped_lock lock(m_mutex);
  240. // Kill ourselves off if needed
  241. if (m_threads.size() > m_threadCount && gettid() != m_rootThread) {
  242. // Accounting
  243. if (isActive)
  244. --m_activeThreadCount;
  245. // Kill off the idle fiber
  246. try {
  247. throw boost::enable_current_exception(
  248. OperationAbortedException());
  249. } catch(...) {
  250. idleFiber->inject(boost::current_exception());
  251. }
  252. // Detach our thread
  253. for (std::vector<boost::shared_ptr<Thread> >
  254. ::iterator it = m_threads.begin();
  255. it != m_threads.end();
  256. ++it)
  257. if ((*it)->tid() == gettid()) {
  258. m_threads.erase(it);
  259. if (m_threads.size() > m_threadCount)
  260. tickle();
  261. return;
  262. }
  263. MORDOR_NOTREACHED();
  264. }
  265. std::list<FiberAndThread>::iterator it(m_fibers.begin());
  266. while (it != m_fibers.end()) {
  267. // If we've met our batch size, and we're not checking to see
  268. // if we need to tickle another thread, then break
  269. if ( (tickleMe || m_activeThreadCount == threadCount()) &&
  270. batch.size() == m_batchSize)
  271. break;
  272. if (it->thread != emptytid() && it->thread != gettid()) {
  273. MORDOR_LOG_DEBUG(g_log) << this
  274. << " skipping item scheduled for thread "
  275. << it->thread;
  276. // Wake up another thread to hopefully service this
  277. tickleMe = true;
  278. dontIdle = true;
  279. ++it;
  280. continue;
  281. }
  282. MORDOR_ASSERT(it->fiber || it->dg);
  283. // This fiber is still executing; probably just some race
  284. // race condition that it needs to yield on one thread
  285. // before running on another thread
  286. if (it->fiber && it->fiber->state() == Fiber::EXEC) {
  287. MORDOR_LOG_DEBUG(g_log) << this
  288. << " skipping executing fiber " << it->fiber;
  289. ++it;
  290. dontIdle = true;
  291. continue;
  292. }
  293. // We were just checking if there is more work; there is, so
  294. // set the flag and don't actually take this piece of work
  295. if (batch.size() == m_batchSize) {
  296. tickleMe = true;
  297. break;
  298. }
  299. batch.push_back(*it);
  300. it = m_fibers.erase(it);
  301. if (!isActive) {
  302. ++m_activeThreadCount;
  303. isActive = true;
  304. }
  305. }
  306. if (batch.empty() && isActive) {
  307. --m_activeThreadCount;
  308. isActive = false;
  309. }
  310. }
  311. if (tickleMe)
  312. tickle();
  313. MORDOR_LOG_DEBUG(g_log) << this
  314. << " got " << batch.size() << " fiber/dgs to process (max: "
  315. << m_batchSize << ", active: " << isActive << ")";
  316. MORDOR_ASSERT(isActive == !batch.empty());
  317. if (batch.empty()) {
  318. if (dontIdle)
  319. continue;
  320. if (idleFiber->state() == Fiber::TERM) {
  321. MORDOR_LOG_DEBUG(g_log) << this << " idle fiber terminated";
  322. if (gettid() == m_rootThread)
  323. m_callingFiber.reset();
  324. // Unblock the next thread
  325. if (threadCount() > 1)
  326. tickle();
  327. return;
  328. }
  329. MORDOR_LOG_DEBUG(g_log) << this << " idling";
  330. atomicIncrement(m_idleThreadCount);
  331. idleFiber->call();
  332. atomicDecrement(m_idleThreadCount);
  333. continue;
  334. }
  335. while (!batch.empty()) {
  336. FiberAndThread& ft = batch.front();
  337. Fiber::ptr f = ft.fiber;
  338. boost::function<void ()> dg = ft.dg;
  339. batch.pop_front();
  340. try {
  341. if (f && f->state() != Fiber::TERM) {
  342. MORDOR_LOG_DEBUG(g_log) << this << " running " << f;
  343. f->yieldTo();
  344. } else if (dg) {
  345. if (dgFiber)
  346. dgFiber->reset(dg);
  347. else
  348. dgFiber.reset(new Fiber(dg));
  349. MORDOR_LOG_DEBUG(g_log) << this << " running " << dg;
  350. dg = NULL;
  351. dgFiber->yieldTo();
  352. if (dgFiber->state() != Fiber::TERM)
  353. dgFiber.reset();
  354. else
  355. dgFiber->reset(NULL);
  356. }
  357. } catch (...) {
  358. try {
  359. MORDOR_LOG_FATAL(Log::root())
  360. << boost::current_exception_diagnostic_information();
  361. }
  362. catch(...) {
  363. // Swallow any exceptions that might occur while trying to log the current fiber state #98680
  364. }
  365. {
  366. boost::mutex::scoped_lock lock(m_mutex);
  367. // push all un-executed fibers back to m_fibers
  368. copy(batch.begin(), batch.end(), back_inserter(m_fibers));
  369. batch.clear();
  370. // decrease the activeCount as this thread is in exception
  371. isActive = false;
  372. --m_activeThreadCount;
  373. }
  374. throw;
  375. }
  376. }
  377. }
  378. }
  379. SchedulerSwitcher::SchedulerSwitcher(Scheduler *target)
  380. {
  381. m_caller = Scheduler::getThis();
  382. if (target)
  383. target->switchTo();
  384. }
  385. SchedulerSwitcher::~SchedulerSwitcher()
  386. {
  387. if (m_caller)
  388. m_caller->switchTo();
  389. }
  390. }