PageRenderTime 166ms CodeModel.GetById 60ms app.highlight 69ms RepoModel.GetById 32ms app.codeStats 0ms

/mordor/scheduler.cpp

http://github.com/mozy/mordor
C++ | 422 lines | 353 code | 32 blank | 37 comment | 117 complexity | 17524c910d76d47dca60ecf13d992888 MD5 | raw file
  1// Copyright (c) 2009 - Mozy, Inc.
  2
  3#include "scheduler.h"
  4
  5#include <boost/bind.hpp>
  6
  7#include "atomic.h"
  8#include "assert.h"
  9#include "fiber.h"
 10
 11namespace Mordor {
 12
 13static Logger::ptr g_log = Log::lookup("mordor:scheduler");
 14
 15ThreadLocalStorage<Scheduler *> Scheduler::t_scheduler;
 16ThreadLocalStorage<Fiber *> Scheduler::t_fiber;
 17
 18Scheduler::Scheduler(size_t threads, bool useCaller, size_t batchSize)
 19    : m_activeThreadCount(0),
 20      m_idleThreadCount(0),
 21      m_stopping(true),
 22      m_autoStop(false),
 23      m_batchSize(batchSize)
 24{
 25    MORDOR_ASSERT(threads >= 1);
 26    if (useCaller) {
 27        --threads;
 28        MORDOR_ASSERT(getThis() == NULL);
 29        t_scheduler = this;
 30        m_rootFiber.reset(new Fiber(boost::bind(&Scheduler::run, this)));
 31        t_scheduler = this;
 32        t_fiber = m_rootFiber.get();
 33        m_rootThread = gettid();
 34    } else {
 35        m_rootThread = emptytid();
 36    }
 37    m_threadCount = threads;
 38}
 39
 40Scheduler::~Scheduler()
 41{
 42    MORDOR_NOTHROW_ASSERT(m_stopping);
 43    if (getThis() == this) {
 44        t_scheduler = NULL;
 45    }
 46}
 47
 48Scheduler *
 49Scheduler::getThis()
 50{
 51    return t_scheduler.get();
 52}
 53
 54void
 55Scheduler::start()
 56{
 57    MORDOR_LOG_VERBOSE(g_log) << this << " starting " << m_threadCount << " threads";
 58    boost::mutex::scoped_lock lock(m_mutex);
 59    if (!m_stopping)
 60        return;
 61    // TODO: There may be a race condition here if one thread calls stop(),
 62    // and another thread calls start() before the worker threads for this
 63    // scheduler actually exit; they may resurrect themselves, and the stopping
 64    // thread would block waiting for the thread to exit
 65
 66    m_stopping = false;
 67    MORDOR_ASSERT(m_threads.empty());
 68    m_threads.resize(m_threadCount);
 69    for (size_t i = 0; i < m_threadCount; ++i) {
 70        m_threads[i] = boost::shared_ptr<Thread>(new Thread(
 71            boost::bind(&Scheduler::run, this)));
 72    }
 73}
 74
 75bool
 76Scheduler::hasWorkToDo()
 77{
 78    boost::mutex::scoped_lock lock(m_mutex);
 79    return !m_fibers.empty();
 80}
 81
 82void
 83Scheduler::stop()
 84{
 85    // Already stopped
 86    if (m_rootFiber &&
 87        m_threadCount == 0 &&
 88        (m_rootFiber->state() == Fiber::TERM || m_rootFiber->state() == Fiber::INIT)) {
 89        MORDOR_LOG_VERBOSE(g_log) << this << " stopped";
 90        m_stopping = true;
 91        // A derived class may inhibit stopping while it has things to do in
 92        // its idle loop, so we can't break early
 93        if (stopping())
 94            return;
 95    }
 96
 97    bool exitOnThisFiber = false;
 98    if (m_rootThread != emptytid()) {
 99        // A thread-hijacking scheduler must be stopped
100        // from within itself to return control to the
101        // original thread
102        MORDOR_ASSERT(Scheduler::getThis() == this);
103        if (Fiber::getThis() == m_callingFiber) {
104            exitOnThisFiber = true;
105            // First switch to the correct thread
106            MORDOR_LOG_DEBUG(g_log) << this
107                << " switching to root thread to stop";
108            switchTo(m_rootThread);
109        }
110        if (!m_callingFiber)
111            exitOnThisFiber = true;
112    } else {
113        // A spawned-threads only scheduler cannot be stopped from within
114        // itself... who would get control?
115        MORDOR_ASSERT(Scheduler::getThis() != this);
116    }
117    m_stopping = true;
118    for (size_t i = 0; i < m_threadCount; ++i)
119        tickle();
120    if (m_rootFiber && (m_threadCount != 0u || Scheduler::getThis() != this))
121        tickle();
122    // Wait for all work to stop on this thread
123    if (exitOnThisFiber) {
124        while (!stopping()) {
125            // Give this thread's run fiber a chance to kill itself off
126            MORDOR_LOG_DEBUG(g_log) << this
127                << " yielding to this thread to stop";
128            yieldTo(true);
129        }
130    }
131    // Wait for other threads to stop
132    if (exitOnThisFiber ||
133        Scheduler::getThis() != this) {
134        MORDOR_LOG_DEBUG(g_log) << this
135            << " waiting for other threads to stop";
136        std::vector<boost::shared_ptr<Thread> > threads;
137        {
138            boost::mutex::scoped_lock lock(m_mutex);
139            threads.swap(m_threads);
140        }
141        for (std::vector<boost::shared_ptr<Thread> >::const_iterator it
142            (threads.begin());
143            it != threads.end();
144            ++it) {
145            (*it)->join();
146        }
147    }
148    MORDOR_LOG_VERBOSE(g_log) << this << " stopped";
149}
150
151bool
152Scheduler::stopping()
153{
154    boost::mutex::scoped_lock lock(m_mutex);
155    return m_stopping && m_fibers.empty() && m_activeThreadCount == 0;
156}
157
158void
159Scheduler::switchTo(tid_t thread)
160{
161    MORDOR_ASSERT(Scheduler::getThis() != NULL);
162    if (Scheduler::getThis() == this) {
163        if (thread == emptytid() || thread == gettid())
164            return;
165    }
166    MORDOR_LOG_DEBUG(g_log) << this << " switching to thread " << thread;
167    schedule(Fiber::getThis(), thread);
168    Scheduler::yieldTo();
169}
170
171void
172Scheduler::yieldTo()
173{
174    Scheduler *self = Scheduler::getThis();
175    MORDOR_ASSERT(self);
176    MORDOR_LOG_DEBUG(g_log) << self << " yielding to scheduler";
177    MORDOR_ASSERT(t_fiber.get());
178    if (self->m_rootThread == gettid() &&
179        (t_fiber->state() == Fiber::INIT || t_fiber->state() == Fiber::TERM)) {
180        self->m_callingFiber = Fiber::getThis();
181        self->yieldTo(true);
182    } else {
183        self->yieldTo(false);
184    }
185}
186
187void
188Scheduler::yield()
189{
190    MORDOR_ASSERT(Scheduler::getThis());
191    Scheduler::getThis()->schedule(Fiber::getThis());
192    yieldTo();
193}
194
195void
196Scheduler::dispatch()
197{
198    MORDOR_LOG_DEBUG(g_log) << this << " dispatching";
199    MORDOR_ASSERT(m_rootThread == gettid() && m_threadCount == 0);
200    m_stopping = true;
201    m_autoStop = true;
202    yieldTo();
203    m_autoStop = false;
204}
205
206void
207Scheduler::threadCount(size_t threads)
208{
209    MORDOR_ASSERT(threads >= 1);
210    if (m_rootFiber)
211        --threads;
212    boost::mutex::scoped_lock lock(m_mutex);
213    if (threads == m_threadCount) {
214        return;
215    } else if (threads > m_threadCount) {
216        m_threads.resize(threads);
217        for (size_t i = m_threadCount; i < threads; ++i)
218            m_threads[i] = boost::shared_ptr<Thread>(new Thread(
219            boost::bind(&Scheduler::run, this)));
220    }
221    m_threadCount = threads;
222}
223
224void
225Scheduler::yieldTo(bool yieldToCallerOnTerminate)
226{
227    MORDOR_ASSERT(t_fiber.get());
228    MORDOR_ASSERT(Scheduler::getThis() == this);
229    if (yieldToCallerOnTerminate)
230        MORDOR_ASSERT(m_rootThread == gettid());
231    if (t_fiber->state() != Fiber::HOLD) {
232        m_stopping = m_autoStop || m_stopping;
233        // XXX: is t_fiber the hijacked thread ?
234        t_fiber->reset(boost::bind(&Scheduler::run, this));
235    }
236    t_fiber->yieldTo(yieldToCallerOnTerminate);
237}
238
239void
240Scheduler::run()
241{
242    setThis();
243    if (gettid() != m_rootThread) {
244        // Running in own thread
245        t_fiber = Fiber::getThis().get();
246    } else {
247        // Hijacked a thread
248        MORDOR_ASSERT(t_fiber.get() == Fiber::getThis().get());
249    }
250    Fiber::ptr idleFiber(new Fiber(boost::bind(&Scheduler::idle, this)));
251    MORDOR_LOG_VERBOSE(g_log) << this << " starting thread with idle fiber " << idleFiber;
252    Fiber::ptr dgFiber;
253    // use a deque for O(1) .size() and pop_front()
254    std::deque<FiberAndThread> batch;
255    bool isActive = false;
256    while (true) {
257        MORDOR_ASSERT(batch.empty());
258        bool dontIdle = false;
259        bool tickleMe = false;
260        {
261            boost::mutex::scoped_lock lock(m_mutex);
262            // Kill ourselves off if needed
263            if (m_threads.size() > m_threadCount && gettid() != m_rootThread) {
264                // Accounting
265                if (isActive)
266                    --m_activeThreadCount;
267                // Kill off the idle fiber
268                try {
269                    throw boost::enable_current_exception(
270                        OperationAbortedException());
271                } catch(...) {
272                    idleFiber->inject(boost::current_exception());
273                }
274                // Detach our thread
275                for (std::vector<boost::shared_ptr<Thread> >
276                    ::iterator it = m_threads.begin();
277                    it != m_threads.end();
278                    ++it)
279                    if ((*it)->tid() == gettid()) {
280                        m_threads.erase(it);
281                        if (m_threads.size() > m_threadCount)
282                            tickle();
283                        return;
284                    }
285                MORDOR_NOTREACHED();
286            }
287
288            std::list<FiberAndThread>::iterator it(m_fibers.begin());
289            while (it != m_fibers.end()) {
290                // If we've met our batch size, and we're not checking to see
291                // if we need to tickle another thread, then break
292                if ( (tickleMe || m_activeThreadCount == threadCount()) &&
293                    batch.size() == m_batchSize)
294                    break;
295                if (it->thread != emptytid() && it->thread != gettid()) {
296                    MORDOR_LOG_DEBUG(g_log) << this
297                        << " skipping item scheduled for thread "
298                        << it->thread;
299
300                    // Wake up another thread to hopefully service this
301                    tickleMe = true;
302                    dontIdle = true;
303                    ++it;
304                    continue;
305                }
306                MORDOR_ASSERT(it->fiber || it->dg);
307                // This fiber is still executing; probably just some race
308                // race condition that it needs to yield on one thread
309                // before running on another thread
310                if (it->fiber && it->fiber->state() == Fiber::EXEC) {
311                    MORDOR_LOG_DEBUG(g_log) << this
312                        << " skipping executing fiber " << it->fiber;
313                    ++it;
314                    dontIdle = true;
315                    continue;
316                }
317                // We were just checking if there is more work; there is, so
318                // set the flag and don't actually take this piece of work
319                if (batch.size() == m_batchSize) {
320                    tickleMe = true;
321                    break;
322                }
323                batch.push_back(*it);
324                it = m_fibers.erase(it);
325                if (!isActive) {
326                    ++m_activeThreadCount;
327                    isActive = true;
328                }
329            }
330            if (batch.empty() && isActive) {
331                --m_activeThreadCount;
332                isActive = false;
333            }
334        }
335        if (tickleMe)
336            tickle();
337        MORDOR_LOG_DEBUG(g_log) << this
338            << " got " << batch.size() << " fiber/dgs to process (max: "
339            << m_batchSize << ", active: " << isActive << ")";
340        MORDOR_ASSERT(isActive == !batch.empty());
341
342        if (batch.empty()) {
343            if (dontIdle)
344                continue;
345
346            if (idleFiber->state() == Fiber::TERM) {
347                MORDOR_LOG_DEBUG(g_log) << this << " idle fiber terminated";
348                if (gettid() == m_rootThread)
349                    m_callingFiber.reset();
350                // Unblock the next thread
351                if (threadCount() > 1)
352                    tickle();
353                return;
354            }
355            MORDOR_LOG_DEBUG(g_log) << this << " idling";
356            atomicIncrement(m_idleThreadCount);
357            idleFiber->call();
358            atomicDecrement(m_idleThreadCount);
359            continue;
360        }
361
362        while (!batch.empty()) {
363            FiberAndThread& ft = batch.front();
364            Fiber::ptr f = ft.fiber;
365            boost::function<void ()> dg = ft.dg;
366            batch.pop_front();
367
368            try {
369                if (f && f->state() != Fiber::TERM) {
370                    MORDOR_LOG_DEBUG(g_log) << this << " running " << f;
371                    f->yieldTo();
372                } else if (dg) {
373                    if (dgFiber)
374                        dgFiber->reset(dg);
375                    else
376                        dgFiber.reset(new Fiber(dg));
377                    MORDOR_LOG_DEBUG(g_log) << this << " running " << dg;
378                    dg = NULL;
379                    dgFiber->yieldTo();
380                    if (dgFiber->state() != Fiber::TERM)
381                        dgFiber.reset();
382                    else
383                        dgFiber->reset(NULL);
384                }
385            } catch (...) {
386                try {
387                    MORDOR_LOG_FATAL(Log::root())
388                        << boost::current_exception_diagnostic_information();
389                }
390                catch(...) {
391                    // Swallow any exceptions that might occur while trying to log the current fiber state #98680
392                }
393
394                {
395                    boost::mutex::scoped_lock lock(m_mutex);
396                    // push all un-executed fibers back to m_fibers
397                    copy(batch.begin(), batch.end(), back_inserter(m_fibers));
398                    batch.clear();
399                    // decrease the activeCount as this thread is in exception
400                    isActive = false;
401                    --m_activeThreadCount;
402                }
403                throw;
404            }
405        }
406    }
407}
408
409SchedulerSwitcher::SchedulerSwitcher(Scheduler *target)
410{
411    m_caller = Scheduler::getThis();
412    if (target)
413        target->switchTo();
414}
415
416SchedulerSwitcher::~SchedulerSwitcher()
417{
418    if (m_caller)
419        m_caller->switchTo();
420}
421
422}