PageRenderTime 46ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/maintainer/locale/Hattrick/Parsers/PlayerPageParser.py

http://foxtrick.googlecode.com/
Python | 70 lines | 54 code | 10 blank | 6 comment | 22 complexity | 63dffac2c8ecf9f519a2214cc132859e MD5 | raw file
Possible License(s): GPL-3.0
  1. #/Club/Players/?TeamID=818875
  2. import HTMLParser
  3. import re
  4. # Parses menu links from hattrick sourcecode.
  5. # Used to automatically validate htlang.xml
  6. #
  7. # CatzHoek
  8. class PlayerPageParser(HTMLParser.HTMLParser):
  9. def __init__(self):
  10. HTMLParser.HTMLParser.__init__(self)
  11. self.players = []
  12. self.currentPlayer = {}
  13. #document location identification helpers
  14. self.in_playerInfo_div = False;
  15. self.in_position_span = False;
  16. def getPlayerIdFromUrl(self, url):
  17. pattern = re.compile("\/Club\/Players\/Player.aspx\?(.*)PlayerID=(\d+)", re.I)
  18. match = re.match(pattern, url)
  19. if match and match.group(2):
  20. return match.group(2)
  21. raise Exception("no_player_link", url);
  22. def handle_starttag(self, tag, attrs):
  23. if tag == 'div':
  24. for key, value in attrs:
  25. if key == 'class' and value == 'playerInfo':
  26. self.in_playerInfo_div = True;
  27. if tag == 'a' and self.in_playerInfo_div:
  28. for key, value in attrs:
  29. if key == 'title':
  30. self.currentPlayer["name"] = value
  31. if key == 'href':
  32. try:
  33. id = self.getPlayerIdFromUrl( value )
  34. self.currentPlayer["id"] = id
  35. except Exception:
  36. pass
  37. if tag == 'span' and self.in_playerInfo_div:
  38. for key, value in attrs:
  39. if key == 'class' and value == 'shy':
  40. self.in_position_span = True;
  41. def handle_endtag(self, tag):
  42. if tag == 'div':
  43. if self.in_playerInfo_div:
  44. self.players.append(self.currentPlayer)
  45. self.currentPlayer = {}
  46. self.currentPlayer['lastposition'] = '';
  47. self.in_playerInfo_div = False; #no nested divs in playerinfo, this is okay
  48. if tag == 'span': # no nested spans in position span, this is okay
  49. self.in_position_span = False;
  50. def handle_data(self, data):
  51. if self.in_position_span:
  52. pattern = re.compile('\((.*)\)')
  53. match = re.match(pattern, data)
  54. if match and match.group(1):
  55. self.currentPlayer["lastposition"] = match.group(1)
  56. def get(self):
  57. return self.players