PageRenderTime 46ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/fs/btrfs/async-thread.c

https://github.com/mstsirkin/linux
C | 718 lines | 472 code | 101 blank | 145 comment | 74 complexity | 4305ca804870bf713ca950aa6cd13783 MD5 | raw file
  1. /*
  2. * Copyright (C) 2007 Oracle. All rights reserved.
  3. *
  4. * This program is free software; you can redistribute it and/or
  5. * modify it under the terms of the GNU General Public
  6. * License v2 as published by the Free Software Foundation.
  7. *
  8. * This program is distributed in the hope that it will be useful,
  9. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. * General Public License for more details.
  12. *
  13. * You should have received a copy of the GNU General Public
  14. * License along with this program; if not, write to the
  15. * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  16. * Boston, MA 021110-1307, USA.
  17. */
  18. #include <linux/kthread.h>
  19. #include <linux/slab.h>
  20. #include <linux/list.h>
  21. #include <linux/spinlock.h>
  22. #include <linux/freezer.h>
  23. #include "async-thread.h"
  24. #define WORK_QUEUED_BIT 0
  25. #define WORK_DONE_BIT 1
  26. #define WORK_ORDER_DONE_BIT 2
  27. #define WORK_HIGH_PRIO_BIT 3
  28. /*
  29. * container for the kthread task pointer and the list of pending work
  30. * One of these is allocated per thread.
  31. */
  32. struct btrfs_worker_thread {
  33. /* pool we belong to */
  34. struct btrfs_workers *workers;
  35. /* list of struct btrfs_work that are waiting for service */
  36. struct list_head pending;
  37. struct list_head prio_pending;
  38. /* list of worker threads from struct btrfs_workers */
  39. struct list_head worker_list;
  40. /* kthread */
  41. struct task_struct *task;
  42. /* number of things on the pending list */
  43. atomic_t num_pending;
  44. /* reference counter for this struct */
  45. atomic_t refs;
  46. unsigned long sequence;
  47. /* protects the pending list. */
  48. spinlock_t lock;
  49. /* set to non-zero when this thread is already awake and kicking */
  50. int working;
  51. /* are we currently idle */
  52. int idle;
  53. };
  54. /*
  55. * btrfs_start_workers uses kthread_run, which can block waiting for memory
  56. * for a very long time. It will actually throttle on page writeback,
  57. * and so it may not make progress until after our btrfs worker threads
  58. * process all of the pending work structs in their queue
  59. *
  60. * This means we can't use btrfs_start_workers from inside a btrfs worker
  61. * thread that is used as part of cleaning dirty memory, which pretty much
  62. * involves all of the worker threads.
  63. *
  64. * Instead we have a helper queue who never has more than one thread
  65. * where we scheduler thread start operations. This worker_start struct
  66. * is used to contain the work and hold a pointer to the queue that needs
  67. * another worker.
  68. */
  69. struct worker_start {
  70. struct btrfs_work work;
  71. struct btrfs_workers *queue;
  72. };
  73. static void start_new_worker_func(struct btrfs_work *work)
  74. {
  75. struct worker_start *start;
  76. start = container_of(work, struct worker_start, work);
  77. btrfs_start_workers(start->queue, 1);
  78. kfree(start);
  79. }
  80. static int start_new_worker(struct btrfs_workers *queue)
  81. {
  82. struct worker_start *start;
  83. int ret;
  84. start = kzalloc(sizeof(*start), GFP_NOFS);
  85. if (!start)
  86. return -ENOMEM;
  87. start->work.func = start_new_worker_func;
  88. start->queue = queue;
  89. ret = btrfs_queue_worker(queue->atomic_worker_start, &start->work);
  90. if (ret)
  91. kfree(start);
  92. return ret;
  93. }
  94. /*
  95. * helper function to move a thread onto the idle list after it
  96. * has finished some requests.
  97. */
  98. static void check_idle_worker(struct btrfs_worker_thread *worker)
  99. {
  100. if (!worker->idle && atomic_read(&worker->num_pending) <
  101. worker->workers->idle_thresh / 2) {
  102. unsigned long flags;
  103. spin_lock_irqsave(&worker->workers->lock, flags);
  104. worker->idle = 1;
  105. /* the list may be empty if the worker is just starting */
  106. if (!list_empty(&worker->worker_list)) {
  107. list_move(&worker->worker_list,
  108. &worker->workers->idle_list);
  109. }
  110. spin_unlock_irqrestore(&worker->workers->lock, flags);
  111. }
  112. }
  113. /*
  114. * helper function to move a thread off the idle list after new
  115. * pending work is added.
  116. */
  117. static void check_busy_worker(struct btrfs_worker_thread *worker)
  118. {
  119. if (worker->idle && atomic_read(&worker->num_pending) >=
  120. worker->workers->idle_thresh) {
  121. unsigned long flags;
  122. spin_lock_irqsave(&worker->workers->lock, flags);
  123. worker->idle = 0;
  124. if (!list_empty(&worker->worker_list)) {
  125. list_move_tail(&worker->worker_list,
  126. &worker->workers->worker_list);
  127. }
  128. spin_unlock_irqrestore(&worker->workers->lock, flags);
  129. }
  130. }
  131. static void check_pending_worker_creates(struct btrfs_worker_thread *worker)
  132. {
  133. struct btrfs_workers *workers = worker->workers;
  134. unsigned long flags;
  135. rmb();
  136. if (!workers->atomic_start_pending)
  137. return;
  138. spin_lock_irqsave(&workers->lock, flags);
  139. if (!workers->atomic_start_pending)
  140. goto out;
  141. workers->atomic_start_pending = 0;
  142. if (workers->num_workers + workers->num_workers_starting >=
  143. workers->max_workers)
  144. goto out;
  145. workers->num_workers_starting += 1;
  146. spin_unlock_irqrestore(&workers->lock, flags);
  147. start_new_worker(workers);
  148. return;
  149. out:
  150. spin_unlock_irqrestore(&workers->lock, flags);
  151. }
  152. static noinline int run_ordered_completions(struct btrfs_workers *workers,
  153. struct btrfs_work *work)
  154. {
  155. if (!workers->ordered)
  156. return 0;
  157. set_bit(WORK_DONE_BIT, &work->flags);
  158. spin_lock(&workers->order_lock);
  159. while (1) {
  160. if (!list_empty(&workers->prio_order_list)) {
  161. work = list_entry(workers->prio_order_list.next,
  162. struct btrfs_work, order_list);
  163. } else if (!list_empty(&workers->order_list)) {
  164. work = list_entry(workers->order_list.next,
  165. struct btrfs_work, order_list);
  166. } else {
  167. break;
  168. }
  169. if (!test_bit(WORK_DONE_BIT, &work->flags))
  170. break;
  171. /* we are going to call the ordered done function, but
  172. * we leave the work item on the list as a barrier so
  173. * that later work items that are done don't have their
  174. * functions called before this one returns
  175. */
  176. if (test_and_set_bit(WORK_ORDER_DONE_BIT, &work->flags))
  177. break;
  178. spin_unlock(&workers->order_lock);
  179. work->ordered_func(work);
  180. /* now take the lock again and call the freeing code */
  181. spin_lock(&workers->order_lock);
  182. list_del(&work->order_list);
  183. work->ordered_free(work);
  184. }
  185. spin_unlock(&workers->order_lock);
  186. return 0;
  187. }
  188. static void put_worker(struct btrfs_worker_thread *worker)
  189. {
  190. if (atomic_dec_and_test(&worker->refs))
  191. kfree(worker);
  192. }
  193. static int try_worker_shutdown(struct btrfs_worker_thread *worker)
  194. {
  195. int freeit = 0;
  196. spin_lock_irq(&worker->lock);
  197. spin_lock(&worker->workers->lock);
  198. if (worker->workers->num_workers > 1 &&
  199. worker->idle &&
  200. !worker->working &&
  201. !list_empty(&worker->worker_list) &&
  202. list_empty(&worker->prio_pending) &&
  203. list_empty(&worker->pending) &&
  204. atomic_read(&worker->num_pending) == 0) {
  205. freeit = 1;
  206. list_del_init(&worker->worker_list);
  207. worker->workers->num_workers--;
  208. }
  209. spin_unlock(&worker->workers->lock);
  210. spin_unlock_irq(&worker->lock);
  211. if (freeit)
  212. put_worker(worker);
  213. return freeit;
  214. }
  215. static struct btrfs_work *get_next_work(struct btrfs_worker_thread *worker,
  216. struct list_head *prio_head,
  217. struct list_head *head)
  218. {
  219. struct btrfs_work *work = NULL;
  220. struct list_head *cur = NULL;
  221. if(!list_empty(prio_head))
  222. cur = prio_head->next;
  223. smp_mb();
  224. if (!list_empty(&worker->prio_pending))
  225. goto refill;
  226. if (!list_empty(head))
  227. cur = head->next;
  228. if (cur)
  229. goto out;
  230. refill:
  231. spin_lock_irq(&worker->lock);
  232. list_splice_tail_init(&worker->prio_pending, prio_head);
  233. list_splice_tail_init(&worker->pending, head);
  234. if (!list_empty(prio_head))
  235. cur = prio_head->next;
  236. else if (!list_empty(head))
  237. cur = head->next;
  238. spin_unlock_irq(&worker->lock);
  239. if (!cur)
  240. goto out_fail;
  241. out:
  242. work = list_entry(cur, struct btrfs_work, list);
  243. out_fail:
  244. return work;
  245. }
  246. /*
  247. * main loop for servicing work items
  248. */
  249. static int worker_loop(void *arg)
  250. {
  251. struct btrfs_worker_thread *worker = arg;
  252. struct list_head head;
  253. struct list_head prio_head;
  254. struct btrfs_work *work;
  255. INIT_LIST_HEAD(&head);
  256. INIT_LIST_HEAD(&prio_head);
  257. do {
  258. again:
  259. while (1) {
  260. work = get_next_work(worker, &prio_head, &head);
  261. if (!work)
  262. break;
  263. list_del(&work->list);
  264. clear_bit(WORK_QUEUED_BIT, &work->flags);
  265. work->worker = worker;
  266. work->func(work);
  267. atomic_dec(&worker->num_pending);
  268. /*
  269. * unless this is an ordered work queue,
  270. * 'work' was probably freed by func above.
  271. */
  272. run_ordered_completions(worker->workers, work);
  273. check_pending_worker_creates(worker);
  274. }
  275. spin_lock_irq(&worker->lock);
  276. check_idle_worker(worker);
  277. if (freezing(current)) {
  278. worker->working = 0;
  279. spin_unlock_irq(&worker->lock);
  280. refrigerator();
  281. } else {
  282. spin_unlock_irq(&worker->lock);
  283. if (!kthread_should_stop()) {
  284. cpu_relax();
  285. /*
  286. * we've dropped the lock, did someone else
  287. * jump_in?
  288. */
  289. smp_mb();
  290. if (!list_empty(&worker->pending) ||
  291. !list_empty(&worker->prio_pending))
  292. continue;
  293. /*
  294. * this short schedule allows more work to
  295. * come in without the queue functions
  296. * needing to go through wake_up_process()
  297. *
  298. * worker->working is still 1, so nobody
  299. * is going to try and wake us up
  300. */
  301. schedule_timeout(1);
  302. smp_mb();
  303. if (!list_empty(&worker->pending) ||
  304. !list_empty(&worker->prio_pending))
  305. continue;
  306. if (kthread_should_stop())
  307. break;
  308. /* still no more work?, sleep for real */
  309. spin_lock_irq(&worker->lock);
  310. set_current_state(TASK_INTERRUPTIBLE);
  311. if (!list_empty(&worker->pending) ||
  312. !list_empty(&worker->prio_pending)) {
  313. spin_unlock_irq(&worker->lock);
  314. set_current_state(TASK_RUNNING);
  315. goto again;
  316. }
  317. /*
  318. * this makes sure we get a wakeup when someone
  319. * adds something new to the queue
  320. */
  321. worker->working = 0;
  322. spin_unlock_irq(&worker->lock);
  323. if (!kthread_should_stop()) {
  324. schedule_timeout(HZ * 120);
  325. if (!worker->working &&
  326. try_worker_shutdown(worker)) {
  327. return 0;
  328. }
  329. }
  330. }
  331. __set_current_state(TASK_RUNNING);
  332. }
  333. } while (!kthread_should_stop());
  334. return 0;
  335. }
  336. /*
  337. * this will wait for all the worker threads to shutdown
  338. */
  339. int btrfs_stop_workers(struct btrfs_workers *workers)
  340. {
  341. struct list_head *cur;
  342. struct btrfs_worker_thread *worker;
  343. int can_stop;
  344. spin_lock_irq(&workers->lock);
  345. list_splice_init(&workers->idle_list, &workers->worker_list);
  346. while (!list_empty(&workers->worker_list)) {
  347. cur = workers->worker_list.next;
  348. worker = list_entry(cur, struct btrfs_worker_thread,
  349. worker_list);
  350. atomic_inc(&worker->refs);
  351. workers->num_workers -= 1;
  352. if (!list_empty(&worker->worker_list)) {
  353. list_del_init(&worker->worker_list);
  354. put_worker(worker);
  355. can_stop = 1;
  356. } else
  357. can_stop = 0;
  358. spin_unlock_irq(&workers->lock);
  359. if (can_stop)
  360. kthread_stop(worker->task);
  361. spin_lock_irq(&workers->lock);
  362. put_worker(worker);
  363. }
  364. spin_unlock_irq(&workers->lock);
  365. return 0;
  366. }
  367. /*
  368. * simple init on struct btrfs_workers
  369. */
  370. void btrfs_init_workers(struct btrfs_workers *workers, char *name, int max,
  371. struct btrfs_workers *async_helper)
  372. {
  373. workers->num_workers = 0;
  374. workers->num_workers_starting = 0;
  375. INIT_LIST_HEAD(&workers->worker_list);
  376. INIT_LIST_HEAD(&workers->idle_list);
  377. INIT_LIST_HEAD(&workers->order_list);
  378. INIT_LIST_HEAD(&workers->prio_order_list);
  379. spin_lock_init(&workers->lock);
  380. spin_lock_init(&workers->order_lock);
  381. workers->max_workers = max;
  382. workers->idle_thresh = 32;
  383. workers->name = name;
  384. workers->ordered = 0;
  385. workers->atomic_start_pending = 0;
  386. workers->atomic_worker_start = async_helper;
  387. }
  388. /*
  389. * starts new worker threads. This does not enforce the max worker
  390. * count in case you need to temporarily go past it.
  391. */
  392. static int __btrfs_start_workers(struct btrfs_workers *workers,
  393. int num_workers)
  394. {
  395. struct btrfs_worker_thread *worker;
  396. int ret = 0;
  397. int i;
  398. for (i = 0; i < num_workers; i++) {
  399. worker = kzalloc(sizeof(*worker), GFP_NOFS);
  400. if (!worker) {
  401. ret = -ENOMEM;
  402. goto fail;
  403. }
  404. INIT_LIST_HEAD(&worker->pending);
  405. INIT_LIST_HEAD(&worker->prio_pending);
  406. INIT_LIST_HEAD(&worker->worker_list);
  407. spin_lock_init(&worker->lock);
  408. atomic_set(&worker->num_pending, 0);
  409. atomic_set(&worker->refs, 1);
  410. worker->workers = workers;
  411. worker->task = kthread_run(worker_loop, worker,
  412. "btrfs-%s-%d", workers->name,
  413. workers->num_workers + i);
  414. if (IS_ERR(worker->task)) {
  415. ret = PTR_ERR(worker->task);
  416. kfree(worker);
  417. goto fail;
  418. }
  419. spin_lock_irq(&workers->lock);
  420. list_add_tail(&worker->worker_list, &workers->idle_list);
  421. worker->idle = 1;
  422. workers->num_workers++;
  423. workers->num_workers_starting--;
  424. WARN_ON(workers->num_workers_starting < 0);
  425. spin_unlock_irq(&workers->lock);
  426. }
  427. return 0;
  428. fail:
  429. btrfs_stop_workers(workers);
  430. return ret;
  431. }
  432. int btrfs_start_workers(struct btrfs_workers *workers, int num_workers)
  433. {
  434. spin_lock_irq(&workers->lock);
  435. workers->num_workers_starting += num_workers;
  436. spin_unlock_irq(&workers->lock);
  437. return __btrfs_start_workers(workers, num_workers);
  438. }
  439. /*
  440. * run through the list and find a worker thread that doesn't have a lot
  441. * to do right now. This can return null if we aren't yet at the thread
  442. * count limit and all of the threads are busy.
  443. */
  444. static struct btrfs_worker_thread *next_worker(struct btrfs_workers *workers)
  445. {
  446. struct btrfs_worker_thread *worker;
  447. struct list_head *next;
  448. int enforce_min;
  449. enforce_min = (workers->num_workers + workers->num_workers_starting) <
  450. workers->max_workers;
  451. /*
  452. * if we find an idle thread, don't move it to the end of the
  453. * idle list. This improves the chance that the next submission
  454. * will reuse the same thread, and maybe catch it while it is still
  455. * working
  456. */
  457. if (!list_empty(&workers->idle_list)) {
  458. next = workers->idle_list.next;
  459. worker = list_entry(next, struct btrfs_worker_thread,
  460. worker_list);
  461. return worker;
  462. }
  463. if (enforce_min || list_empty(&workers->worker_list))
  464. return NULL;
  465. /*
  466. * if we pick a busy task, move the task to the end of the list.
  467. * hopefully this will keep things somewhat evenly balanced.
  468. * Do the move in batches based on the sequence number. This groups
  469. * requests submitted at roughly the same time onto the same worker.
  470. */
  471. next = workers->worker_list.next;
  472. worker = list_entry(next, struct btrfs_worker_thread, worker_list);
  473. worker->sequence++;
  474. if (worker->sequence % workers->idle_thresh == 0)
  475. list_move_tail(next, &workers->worker_list);
  476. return worker;
  477. }
  478. /*
  479. * selects a worker thread to take the next job. This will either find
  480. * an idle worker, start a new worker up to the max count, or just return
  481. * one of the existing busy workers.
  482. */
  483. static struct btrfs_worker_thread *find_worker(struct btrfs_workers *workers)
  484. {
  485. struct btrfs_worker_thread *worker;
  486. unsigned long flags;
  487. struct list_head *fallback;
  488. again:
  489. spin_lock_irqsave(&workers->lock, flags);
  490. worker = next_worker(workers);
  491. if (!worker) {
  492. if (workers->num_workers + workers->num_workers_starting >=
  493. workers->max_workers) {
  494. goto fallback;
  495. } else if (workers->atomic_worker_start) {
  496. workers->atomic_start_pending = 1;
  497. goto fallback;
  498. } else {
  499. workers->num_workers_starting++;
  500. spin_unlock_irqrestore(&workers->lock, flags);
  501. /* we're below the limit, start another worker */
  502. __btrfs_start_workers(workers, 1);
  503. goto again;
  504. }
  505. }
  506. goto found;
  507. fallback:
  508. fallback = NULL;
  509. /*
  510. * we have failed to find any workers, just
  511. * return the first one we can find.
  512. */
  513. if (!list_empty(&workers->worker_list))
  514. fallback = workers->worker_list.next;
  515. if (!list_empty(&workers->idle_list))
  516. fallback = workers->idle_list.next;
  517. BUG_ON(!fallback);
  518. worker = list_entry(fallback,
  519. struct btrfs_worker_thread, worker_list);
  520. found:
  521. /*
  522. * this makes sure the worker doesn't exit before it is placed
  523. * onto a busy/idle list
  524. */
  525. atomic_inc(&worker->num_pending);
  526. spin_unlock_irqrestore(&workers->lock, flags);
  527. return worker;
  528. }
  529. /*
  530. * btrfs_requeue_work just puts the work item back on the tail of the list
  531. * it was taken from. It is intended for use with long running work functions
  532. * that make some progress and want to give the cpu up for others.
  533. */
  534. int btrfs_requeue_work(struct btrfs_work *work)
  535. {
  536. struct btrfs_worker_thread *worker = work->worker;
  537. unsigned long flags;
  538. int wake = 0;
  539. if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
  540. goto out;
  541. spin_lock_irqsave(&worker->lock, flags);
  542. if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
  543. list_add_tail(&work->list, &worker->prio_pending);
  544. else
  545. list_add_tail(&work->list, &worker->pending);
  546. atomic_inc(&worker->num_pending);
  547. /* by definition we're busy, take ourselves off the idle
  548. * list
  549. */
  550. if (worker->idle) {
  551. spin_lock(&worker->workers->lock);
  552. worker->idle = 0;
  553. list_move_tail(&worker->worker_list,
  554. &worker->workers->worker_list);
  555. spin_unlock(&worker->workers->lock);
  556. }
  557. if (!worker->working) {
  558. wake = 1;
  559. worker->working = 1;
  560. }
  561. if (wake)
  562. wake_up_process(worker->task);
  563. spin_unlock_irqrestore(&worker->lock, flags);
  564. out:
  565. return 0;
  566. }
  567. void btrfs_set_work_high_prio(struct btrfs_work *work)
  568. {
  569. set_bit(WORK_HIGH_PRIO_BIT, &work->flags);
  570. }
  571. /*
  572. * places a struct btrfs_work into the pending queue of one of the kthreads
  573. */
  574. int btrfs_queue_worker(struct btrfs_workers *workers, struct btrfs_work *work)
  575. {
  576. struct btrfs_worker_thread *worker;
  577. unsigned long flags;
  578. int wake = 0;
  579. /* don't requeue something already on a list */
  580. if (test_and_set_bit(WORK_QUEUED_BIT, &work->flags))
  581. goto out;
  582. worker = find_worker(workers);
  583. if (workers->ordered) {
  584. /*
  585. * you're not allowed to do ordered queues from an
  586. * interrupt handler
  587. */
  588. spin_lock(&workers->order_lock);
  589. if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags)) {
  590. list_add_tail(&work->order_list,
  591. &workers->prio_order_list);
  592. } else {
  593. list_add_tail(&work->order_list, &workers->order_list);
  594. }
  595. spin_unlock(&workers->order_lock);
  596. } else {
  597. INIT_LIST_HEAD(&work->order_list);
  598. }
  599. spin_lock_irqsave(&worker->lock, flags);
  600. if (test_bit(WORK_HIGH_PRIO_BIT, &work->flags))
  601. list_add_tail(&work->list, &worker->prio_pending);
  602. else
  603. list_add_tail(&work->list, &worker->pending);
  604. check_busy_worker(worker);
  605. /*
  606. * avoid calling into wake_up_process if this thread has already
  607. * been kicked
  608. */
  609. if (!worker->working)
  610. wake = 1;
  611. worker->working = 1;
  612. if (wake)
  613. wake_up_process(worker->task);
  614. spin_unlock_irqrestore(&worker->lock, flags);
  615. out:
  616. return 0;
  617. }