PageRenderTime 61ms CodeModel.GetById 27ms RepoModel.GetById 0ms app.codeStats 1ms

/happypenguin/happypenguin/tome/management/commands/import_hp_dump.py

https://bitbucket.org/michaelb/happypenguin-ng
Python | 384 lines | 378 code | 6 blank | 0 comment | 5 complexity | 8f30356149601e149f8c104d80c1bfb8 MD5 | raw file
  1. from tome.models import Game, Link, Update
  2. from django.contrib import admin
  3. from django.core.management.base import BaseCommand , CommandError
  4. import sys
  5. import datetime
  6. import dateutil.parser
  7. import shutil
  8. import os.path
  9. from django.contrib.auth.models import User
  10. from datetime import datetime
  11. from django.conf import settings
  12. from optparse import make_option
  13. from tome.util import warning, trace, error
  14. import json
  15. def new_user(name):
  16. if not User.objects.filter(username=name):
  17. joetest = User.objects.create_user(name,
  18. "%s@test.com"%name, "asdf")
  19. joetest.is_active = True
  20. joetest.save()
  21. else:
  22. joetest = User.objects.get(username=name)
  23. return joetest
  24. USE = """
  25. How to use: Download bobz's DB dump and untar it somewhere easy to access. (ie
  26. './data/). Download specify path to download of both gigri's denormalized JSON
  27. files and untar to the same location as bobz's DB dump. Specify the path to
  28. this folder as the only argument to this command.
  29. """
  30. def stub_user(name, is_staff=False):
  31. if not User.objects.filter(username=name):
  32. new_user = User.objects.create(username=name)
  33. new_user.is_active = False
  34. new_user.is_staff = is_staff
  35. new_user.save()
  36. else:
  37. new_user = User.objects.get(username=name)
  38. # User approved a post, mark as staff
  39. if is_staff and not new_user.is_staff:
  40. new_user.is_staff = True
  41. new_user.save()
  42. return new_user
  43. def copy_image(source, just_link=True):
  44. name = os.path.split(source)[-1]
  45. dest = os.path.join(settings.MEDIA_ROOT, "screenshots", name)
  46. dest_dir = os.path.join(settings.MEDIA_ROOT, "screenshots")
  47. try:
  48. os.makedirs(dest_dir)
  49. except OSError:
  50. pass
  51. trace("%s %s -> %s" % (("Copy", "Linking")[int(just_link)], source, dest))
  52. if just_link:
  53. if not os.path.lexists(dest):
  54. os.link(source, dest)
  55. else:
  56. shutil.copyfile(source, dest)
  57. return os.path.join("screenshots", name)
  58. def determine_url_type(desc):
  59. has = lambda *a: any(map(lambda s: s in desc, a))
  60. if has('git'):
  61. return Link.GIT
  62. if has('mercurial'):
  63. return Link.MERCURIAL
  64. if has('repository', 'svn'):
  65. return Link.REPO
  66. if has('source') and not has('resource'):
  67. return Link.SOURCE
  68. if has('dev', 'project page'):
  69. return Link.CONTRIBUTE
  70. if has('download', 'binary', 'deb', 'rpm', 'rar', 'zip', 'installer'):
  71. return Link.DOWNLOAD
  72. if has('wiki'):
  73. return Link.WIKI
  74. if has('forum'):
  75. return Link.FORUM
  76. if has('ppa'):
  77. return Link.PPA
  78. if has('document', 'docs', 'readme', 'faq', 'guide', 'manual', 'tutorial'):
  79. return Link.DOCUMENTATION
  80. if has('buy', 'purchase'):
  81. return Link.PURCHASE
  82. return Link.WEBSITE
  83. def get_urls(game):
  84. urls = []
  85. for url_data in game.get('urls', []):
  86. r = {}
  87. if not url_data.get('url'):
  88. continue
  89. desc = (url_data.get('description', '') or '')
  90. r['type'] = determine_url_type(desc.lower())
  91. r['description'] = desc
  92. r['url'] = url_data.get('url')
  93. urls.append(r)
  94. if game.get('homepage'):
  95. urls.append({
  96. 'url': game.get('homepage'),
  97. 'description': 'Homepage',
  98. 'type': Link.WEBSITE,
  99. 'primary': True
  100. })
  101. elif urls:
  102. urls[0]['primary'] = True
  103. if not urls:
  104. warning("No URLs specified for '%s'" % game.get('title', ''))
  105. # print "GAME", json.dumps(game, indent=2)
  106. # sys.exit(0)
  107. return urls
  108. def create_lgt_entry():
  109. michaelb = stub_user("michaelb")
  110. return Game.create(
  111. urls=[], # later we can add in repo stuff
  112. title="The Linux Gaming Tome",
  113. description="Wasting your time since 1995!",
  114. content="Okay.",
  115. #website_link=url,
  116. content_is_free=True,
  117. engine_is_free=True,
  118. latest_version='NG-0.1.0',
  119. #publish_date=pub_date,
  120. #submission_date=sub_date,
  121. user=michaelb,
  122. approved_by=michaelb,
  123. # Screenshot:
  124. featured_image=None,
  125. cost=0.0,
  126. )
  127. def make_comments(game):
  128. comments = []
  129. def _parse_news_soup(html):
  130. CL = "<em>Changelog:</em>"
  131. if CL in html:
  132. trace("(NEWS) Assuming it contains changelog, splitting by that")
  133. return html.partition(CL)[-1]
  134. # Otherwise just return all the HTML
  135. return html
  136. def create_update(update, lgt_entry):
  137. game_title = update.get('game')
  138. if not game_title:
  139. # Is a site news update, create update in the Linux Gaming Tome's "fake game" page
  140. game = lgt_entry
  141. else:
  142. # Use Game's built in "search" function to find the right title
  143. game = Game.objects.search(game_title)
  144. if not game:
  145. warning("Update for '%s' which does not exist" % game_title)
  146. return False
  147. game = game[0]
  148. user = stub_user(update.get('user') or 'bobz', is_staff=False)
  149. type = {
  150. "updated": Update.VERSION,
  151. "default": Update.NEWS,
  152. "new": Update.INITIAL_RELEASE,
  153. }.get(update.get('newstype'), Update.INITIAL_RELEASE)
  154. pub_date = _parse_date(update, 'timestamp')
  155. content = _parse_news_soup(update.get('news'))
  156. Update.objects.create(
  157. game=game,
  158. title=update.get('headline'),
  159. user=user,
  160. type=user,
  161. publish_date=pub_date,
  162. content=content,
  163. )
  164. return True
  165. oldest = None
  166. def _parse_date(d, key):
  167. global oldest
  168. # Simple hack to parse a date, but fallback on the oldest date so far parsed
  169. _INV = "INVALID"
  170. try:
  171. pub_date = dateutil.parser.parse(d.get(key, _INV) or _INV)
  172. except ValueError:
  173. # Unparsable date
  174. pub_date = oldest
  175. if not oldest:
  176. oldest = pub_date
  177. return pub_date
  178. def create_game(game, screenshots_path, just_link):
  179. ###############################
  180. # Get freedom status
  181. ###############################
  182. is_free = game.get('license') == 'free'
  183. ###############################
  184. # Handle screenshot
  185. ###############################
  186. if game.get('screenshot'):
  187. source = os.path.join(screenshots_path, game['screenshot'])
  188. # copy over to new location now
  189. if not os.path.exists(source):
  190. warning("Could not find screenshot '%s' " % source)
  191. image = ''
  192. else:
  193. image = copy_image(source, just_link)
  194. else:
  195. image = ''
  196. ###############################
  197. # Parse cost
  198. ###############################
  199. cost = (game.get('cost', '') or "").strip("$USD") or "0.00"
  200. try:
  201. # quick lil validation
  202. cost = "%i.%i" % tuple(map(int, cost.split('.')))
  203. except Exception as e:
  204. warning("Unusual cost string: %s" % cost)
  205. trace("received exception: %s" % repr(e))
  206. cost = "0.00"
  207. ###############################
  208. # Get dates
  209. ###############################
  210. global oldest
  211. # Published date (ie, approved date):
  212. pub_date = _parse_date(game, 'approved_date')
  213. # Submitted dates:
  214. # "sumbitted" typo exists in data-v
  215. sub_date = _parse_date(game, 'date_sumbitted')
  216. ###############################
  217. ###############################
  218. # Get urls
  219. ###############################
  220. urls = get_urls(game)
  221. ###############################
  222. # Get or make user accounts
  223. ###############################
  224. user_submitted_by = stub_user(game.get('submitted_by', 'bobz'), is_staff=False)
  225. user_approved_by = stub_user(game.get('approved_by', 'bobz'), is_staff=True)
  226. ###############################
  227. Game.create(
  228. urls=urls,
  229. title=game.get('title'),
  230. description=game.get('short_description'),
  231. content=game.get('description'),
  232. #website_link=url,
  233. content_is_free=is_free,
  234. engine_is_free=is_free,
  235. latest_version=game.get('version') or '',
  236. publish_date=pub_date,
  237. submission_date=sub_date,
  238. user=user_submitted_by,
  239. approved_by=user_approved_by,
  240. # Screenshot:
  241. featured_image=image,
  242. cost=cost,
  243. )
  244. text = ''
  245. class Command(BaseCommand):
  246. args = "<path-to-unzipped-json-dump>"
  247. option_list = BaseCommand.option_list + (
  248. make_option('--sample', '-s', dest='sample',
  249. action="store_true",
  250. help='Stop after the first 20 items'),
  251. make_option('--copy', '-c', dest='copy',
  252. action="store_true",
  253. help='Copy screenshots instead of ln (for prod env)'),
  254. )
  255. help = USE
  256. def handle(self, path, sample=False, copy=False, **opts):
  257. if not path:
  258. print USE
  259. error("Did not specify path")
  260. # optional arg to force it to copy as opposed to link
  261. just_link = not copy
  262. # First checking if a sqlite DB exists there
  263. json_path = os.path.join(path, 'games.json')
  264. trace("Checking for %s..." % json_path)
  265. if not os.path.exists(json_path):
  266. print USE
  267. error("%s path does not exist. Make sure " % json_path+
  268. "you download gigri's denormalized "+
  269. "json.")
  270. # First checking if a sqlite DB exists there
  271. news_json_path = os.path.join(path, 'news.json')
  272. trace("Checking for %s..." % news_json_path)
  273. if not os.path.exists(news_json_path):
  274. print USE
  275. error("%s path does not exist. Make sure " % news_json_path+
  276. "you download gigri's denormalized "+
  277. "json.")
  278. screenshots_path = os.path.join(path, 'screenshots')
  279. if not os.path.exists(screenshots_path):
  280. print USE
  281. error("%s path does not exist. " % screenshots_path +
  282. " Make sure that the screenshot "+
  283. "directory from bobz's original dump is in the same "+
  284. "place as the json.")
  285. trace("Parsing all of Games DB into memory...")
  286. games = json.load(open(json_path))
  287. trace("Creating LGT game entry...")
  288. lgt_entry = create_lgt_entry()
  289. i = 0
  290. for game in games:
  291. create_game(game, screenshots_path, just_link)
  292. if sample and i > 20:
  293. trace("Got to 20, skipping...")
  294. break
  295. i += 1
  296. trace("Parsing all of News DB into memory...")
  297. news = json.load(open(news_json_path))
  298. i = 0
  299. for update in news:
  300. found = create_update(update, lgt_entry)
  301. if sample and i > 20:
  302. trace("Got to 20 updates of existing games, skipping...")
  303. break
  304. if found:
  305. i += 1