/django-workload/django_workload/feed.py

https://github.com/Instagram/django-workload · Python · 215 lines · 155 code · 40 blank · 20 comment · 20 complexity · 3e465dc7e5fba3dd0f20d2b6b1bf4b72 MD5 · raw file

  1. # Copyright 2017-present, Facebook, Inc.
  2. # All rights reserved.
  3. #
  4. # This source code is licensed under the license found in the
  5. # LICENSE file in the root directory of this source tree.
  6. # Async can be introduced gradually. This file contains some async routines
  7. # that are used from a synchronous endpoint.
  8. import asyncio
  9. from .models import FeedEntryModel, UserModel
  10. from .users import suggested_users
  11. def wait_for(coro):
  12. loop = asyncio.get_event_loop()
  13. return loop.run_until_complete(coro)
  14. class Context(object):
  15. """Shared context among async methods"""
  16. def __init__(self, request):
  17. self.endresult = None
  18. self.prepared = None
  19. self.request = request
  20. self.user = self.request.user
  21. def result_for(self, step):
  22. return self.prepared.get(step, None)
  23. class AsyncStep(object):
  24. def __init__(self, context):
  25. self.context = context
  26. async def prepare(self):
  27. """Do work that can be done in parallel"""
  28. pass
  29. @property
  30. def prepared_result(self):
  31. return self.context.result_for(self)
  32. def run(self):
  33. """Execute work in series; all prepare work has completed"""
  34. pass
  35. class Feed(object):
  36. def __init__(self, request):
  37. self.request = request
  38. self.context = None
  39. def feed_page(self):
  40. self.prepare()
  41. self.run()
  42. result = self.post_process(self.context.endresult)
  43. return result
  44. def dup_data(self, item_list, config):
  45. # remove suggestions from items list
  46. items_len = len(item_list)
  47. for i in range(items_len - 1, -1, -1):
  48. if 'entry' not in item_list[i]:
  49. config.sugg_list.append(item_list[i])
  50. item_list.pop(i)
  51. # duplicate the data
  52. for i in range(config.get_mult_factor()):
  53. config.list_extend(item_list)
  54. def sort_data(self, config):
  55. # sort by comment count
  56. s_list = sorted(config.work_list,
  57. key=lambda x: x['entry']['comment_count'],
  58. reverse=True)
  59. # inefficiently bubble sort by time stamp decreasingly
  60. while not config.is_sorted():
  61. items_len = len(s_list)
  62. config.swapped = False
  63. for i in range(items_len - 1):
  64. first = s_list[i]['entry']['published']
  65. second = s_list[i + 1]['entry']['published']
  66. if (first < second):
  67. aux = s_list[i]
  68. s_list[i] = s_list[i + 1]
  69. s_list[i + 1] = aux
  70. config.swapped = True
  71. if not config.swapped:
  72. config.set_sorted(True)
  73. return s_list
  74. def post_process(self, result):
  75. item_list = result['items']
  76. config = FeedConfig()
  77. self.dup_data(item_list, config)
  78. s_list = self.sort_data(config)
  79. # un-duplicate the data
  80. final_items = []
  81. for item in s_list:
  82. exists = False
  83. for final_item in final_items:
  84. if final_item['entry']['pk'] == item['entry']['pk']:
  85. exists = True
  86. break
  87. if not exists:
  88. final_items.append(item)
  89. result['items'] = final_items
  90. result['items'].extend(config.sugg_list)
  91. return result
  92. def prepare(self):
  93. self.context = context = Context(self.request)
  94. self.steps = [
  95. FollowedEntries(context),
  96. SuggestedUsers(context),
  97. Assemble(context),
  98. ]
  99. self.context.prepared = dict(
  100. zip(self.steps, wait_for(self.async_prepare())))
  101. async def async_prepare(self):
  102. return await asyncio.gather(*(s.prepare() for s in self.steps))
  103. def run(self):
  104. for step in self.steps:
  105. step.run()
  106. class FollowedEntries(AsyncStep):
  107. async def prepare(self):
  108. # The Cassandra ORM doesn't offer async support yet, so we'll use a
  109. # thread executor pool instead
  110. def fetch_10_posts(user):
  111. following = user.following
  112. return list(
  113. FeedEntryModel.objects.filter(userid__in=following).limit(10))
  114. def fetch_users(userids):
  115. return {
  116. u.id: u for u in UserModel.objects.filter(id__in=list(userids))}
  117. loop = asyncio.get_event_loop()
  118. entries = await loop.run_in_executor(
  119. None, fetch_10_posts, self.context.user)
  120. userids = {e.userid for e in entries}
  121. usermap = await loop.run_in_executor(
  122. None, fetch_users, userids)
  123. return (entries, usermap)
  124. def run(self):
  125. entries, usermap = self.prepared_result
  126. user = self.context.user
  127. user_info = {id_: user.json_data for id_, user in usermap.items()}
  128. self.context.entries = [
  129. {'entry':{
  130. 'pk': str(e.id),
  131. 'comment_count': e.comment_count,
  132. 'published': e.published.timestamp(),
  133. 'user': user_info[e.userid]
  134. }}
  135. for e in entries]
  136. class SuggestedUsers(AsyncStep):
  137. async def prepare(self):
  138. def fetch_users(userids):
  139. return list(UserModel.objects.filter(id__in=userids))
  140. if len(self.context.user.following) < 25:
  141. # only suggest when this user isn't following so many people yet
  142. userids = suggested_users(self.context.user)
  143. loop = asyncio.get_event_loop()
  144. return await loop.run_in_executor(None, fetch_users, userids)
  145. def run(self):
  146. suggestions = self.prepared_result
  147. if suggestions:
  148. self.context.entries.insert(3, {
  149. 'suggestions': [
  150. user.json_data
  151. for user in suggestions]
  152. })
  153. class Assemble(AsyncStep):
  154. def run(self):
  155. self.context.endresult = {
  156. 'num_results': len(self.context.entries),
  157. 'items': self.context.entries
  158. }
  159. class FeedConfig(object):
  160. def __init__(self):
  161. # Number of times the original items list is duplicated in order
  162. # to make the view more Python intensive
  163. self.mult_factor = 10
  164. self.sorted = False
  165. self.work_list = []
  166. self.sugg_list = []
  167. self.swapped = False
  168. def get_mult_factor(self):
  169. return self.mult_factor
  170. def is_sorted(self):
  171. return self.sorted
  172. def set_sorted(self, val):
  173. self.sorted = val
  174. def list_extend(self, l):
  175. self.work_list.extend(l)