/happypenguin/happypenguin/tome/management/commands/import_hp_dump.py
Python | 384 lines | 378 code | 6 blank | 0 comment | 5 complexity | 8f30356149601e149f8c104d80c1bfb8 MD5 | raw file
- from tome.models import Game, Link, Update
- from django.contrib import admin
- from django.core.management.base import BaseCommand , CommandError
- import sys
- import datetime
- import dateutil.parser
- import shutil
- import os.path
- from django.contrib.auth.models import User
- from datetime import datetime
- from django.conf import settings
- from optparse import make_option
- from tome.util import warning, trace, error
- import json
- def new_user(name):
- if not User.objects.filter(username=name):
- joetest = User.objects.create_user(name,
- "%s@test.com"%name, "asdf")
- joetest.is_active = True
- joetest.save()
- else:
- joetest = User.objects.get(username=name)
- return joetest
- USE = """
- How to use: Download bobz's DB dump and untar it somewhere easy to access. (ie
- './data/). Download specify path to download of both gigri's denormalized JSON
- files and untar to the same location as bobz's DB dump. Specify the path to
- this folder as the only argument to this command.
- """
- def stub_user(name, is_staff=False):
- if not User.objects.filter(username=name):
- new_user = User.objects.create(username=name)
- new_user.is_active = False
- new_user.is_staff = is_staff
- new_user.save()
- else:
- new_user = User.objects.get(username=name)
- # User approved a post, mark as staff
- if is_staff and not new_user.is_staff:
- new_user.is_staff = True
- new_user.save()
- return new_user
- def copy_image(source, just_link=True):
- name = os.path.split(source)[-1]
- dest = os.path.join(settings.MEDIA_ROOT, "screenshots", name)
- dest_dir = os.path.join(settings.MEDIA_ROOT, "screenshots")
- try:
- os.makedirs(dest_dir)
- except OSError:
- pass
- trace("%s %s -> %s" % (("Copy", "Linking")[int(just_link)], source, dest))
- if just_link:
- if not os.path.lexists(dest):
- os.link(source, dest)
- else:
- shutil.copyfile(source, dest)
- return os.path.join("screenshots", name)
- def determine_url_type(desc):
- has = lambda *a: any(map(lambda s: s in desc, a))
- if has('git'):
- return Link.GIT
- if has('mercurial'):
- return Link.MERCURIAL
- if has('repository', 'svn'):
- return Link.REPO
- if has('source') and not has('resource'):
- return Link.SOURCE
- if has('dev', 'project page'):
- return Link.CONTRIBUTE
- if has('download', 'binary', 'deb', 'rpm', 'rar', 'zip', 'installer'):
- return Link.DOWNLOAD
- if has('wiki'):
- return Link.WIKI
- if has('forum'):
- return Link.FORUM
- if has('ppa'):
- return Link.PPA
- if has('document', 'docs', 'readme', 'faq', 'guide', 'manual', 'tutorial'):
- return Link.DOCUMENTATION
- if has('buy', 'purchase'):
- return Link.PURCHASE
- return Link.WEBSITE
- def get_urls(game):
- urls = []
- for url_data in game.get('urls', []):
- r = {}
- if not url_data.get('url'):
- continue
- desc = (url_data.get('description', '') or '')
- r['type'] = determine_url_type(desc.lower())
- r['description'] = desc
- r['url'] = url_data.get('url')
- urls.append(r)
- if game.get('homepage'):
- urls.append({
- 'url': game.get('homepage'),
- 'description': 'Homepage',
- 'type': Link.WEBSITE,
- 'primary': True
- })
- elif urls:
- urls[0]['primary'] = True
- if not urls:
- warning("No URLs specified for '%s'" % game.get('title', ''))
- # print "GAME", json.dumps(game, indent=2)
- # sys.exit(0)
- return urls
- def create_lgt_entry():
- michaelb = stub_user("michaelb")
- return Game.create(
- urls=[], # later we can add in repo stuff
- title="The Linux Gaming Tome",
- description="Wasting your time since 1995!",
- content="Okay.",
- #website_link=url,
- content_is_free=True,
- engine_is_free=True,
- latest_version='NG-0.1.0',
- #publish_date=pub_date,
- #submission_date=sub_date,
- user=michaelb,
- approved_by=michaelb,
- # Screenshot:
- featured_image=None,
- cost=0.0,
- )
- def make_comments(game):
- comments = []
- def _parse_news_soup(html):
- CL = "<em>Changelog:</em>"
- if CL in html:
- trace("(NEWS) Assuming it contains changelog, splitting by that")
- return html.partition(CL)[-1]
- # Otherwise just return all the HTML
- return html
- def create_update(update, lgt_entry):
- game_title = update.get('game')
- if not game_title:
- # Is a site news update, create update in the Linux Gaming Tome's "fake game" page
- game = lgt_entry
- else:
- # Use Game's built in "search" function to find the right title
- game = Game.objects.search(game_title)
- if not game:
- warning("Update for '%s' which does not exist" % game_title)
- return False
- game = game[0]
- user = stub_user(update.get('user') or 'bobz', is_staff=False)
- type = {
- "updated": Update.VERSION,
- "default": Update.NEWS,
- "new": Update.INITIAL_RELEASE,
- }.get(update.get('newstype'), Update.INITIAL_RELEASE)
- pub_date = _parse_date(update, 'timestamp')
- content = _parse_news_soup(update.get('news'))
- Update.objects.create(
- game=game,
- title=update.get('headline'),
- user=user,
- type=user,
- publish_date=pub_date,
- content=content,
- )
- return True
- oldest = None
- def _parse_date(d, key):
- global oldest
- # Simple hack to parse a date, but fallback on the oldest date so far parsed
- _INV = "INVALID"
- try:
- pub_date = dateutil.parser.parse(d.get(key, _INV) or _INV)
- except ValueError:
- # Unparsable date
- pub_date = oldest
- if not oldest:
- oldest = pub_date
- return pub_date
- def create_game(game, screenshots_path, just_link):
- ###############################
- # Get freedom status
- ###############################
- is_free = game.get('license') == 'free'
- ###############################
- # Handle screenshot
- ###############################
- if game.get('screenshot'):
- source = os.path.join(screenshots_path, game['screenshot'])
- # copy over to new location now
- if not os.path.exists(source):
- warning("Could not find screenshot '%s' " % source)
- image = ''
- else:
- image = copy_image(source, just_link)
- else:
- image = ''
- ###############################
- # Parse cost
- ###############################
- cost = (game.get('cost', '') or "").strip("$USD") or "0.00"
- try:
- # quick lil validation
- cost = "%i.%i" % tuple(map(int, cost.split('.')))
- except Exception as e:
- warning("Unusual cost string: %s" % cost)
- trace("received exception: %s" % repr(e))
- cost = "0.00"
- ###############################
- # Get dates
- ###############################
- global oldest
- # Published date (ie, approved date):
- pub_date = _parse_date(game, 'approved_date')
- # Submitted dates:
- # "sumbitted" typo exists in data-v
- sub_date = _parse_date(game, 'date_sumbitted')
- ###############################
- ###############################
- # Get urls
- ###############################
- urls = get_urls(game)
- ###############################
- # Get or make user accounts
- ###############################
- user_submitted_by = stub_user(game.get('submitted_by', 'bobz'), is_staff=False)
- user_approved_by = stub_user(game.get('approved_by', 'bobz'), is_staff=True)
- ###############################
- Game.create(
- urls=urls,
- title=game.get('title'),
- description=game.get('short_description'),
- content=game.get('description'),
- #website_link=url,
- content_is_free=is_free,
- engine_is_free=is_free,
- latest_version=game.get('version') or '',
- publish_date=pub_date,
- submission_date=sub_date,
- user=user_submitted_by,
- approved_by=user_approved_by,
- # Screenshot:
- featured_image=image,
- cost=cost,
- )
- text = ''
- class Command(BaseCommand):
- args = "<path-to-unzipped-json-dump>"
- option_list = BaseCommand.option_list + (
- make_option('--sample', '-s', dest='sample',
- action="store_true",
- help='Stop after the first 20 items'),
- make_option('--copy', '-c', dest='copy',
- action="store_true",
- help='Copy screenshots instead of ln (for prod env)'),
- )
- help = USE
- def handle(self, path, sample=False, copy=False, **opts):
- if not path:
- print USE
- error("Did not specify path")
- # optional arg to force it to copy as opposed to link
- just_link = not copy
- # First checking if a sqlite DB exists there
- json_path = os.path.join(path, 'games.json')
- trace("Checking for %s..." % json_path)
- if not os.path.exists(json_path):
- print USE
- error("%s path does not exist. Make sure " % json_path+
- "you download gigri's denormalized "+
- "json.")
- # First checking if a sqlite DB exists there
- news_json_path = os.path.join(path, 'news.json')
- trace("Checking for %s..." % news_json_path)
- if not os.path.exists(news_json_path):
- print USE
- error("%s path does not exist. Make sure " % news_json_path+
- "you download gigri's denormalized "+
- "json.")
- screenshots_path = os.path.join(path, 'screenshots')
- if not os.path.exists(screenshots_path):
- print USE
- error("%s path does not exist. " % screenshots_path +
- " Make sure that the screenshot "+
- "directory from bobz's original dump is in the same "+
- "place as the json.")
- trace("Parsing all of Games DB into memory...")
- games = json.load(open(json_path))
- trace("Creating LGT game entry...")
- lgt_entry = create_lgt_entry()
- i = 0
- for game in games:
- create_game(game, screenshots_path, just_link)
- if sample and i > 20:
- trace("Got to 20, skipping...")
- break
- i += 1
- trace("Parsing all of News DB into memory...")
- news = json.load(open(news_json_path))
- i = 0
- for update in news:
- found = create_update(update, lgt_entry)
- if sample and i > 20:
- trace("Got to 20 updates of existing games, skipping...")
- break
- if found:
- i += 1