PageRenderTime 42ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/tibstat/tibiacom.py

https://code.google.com/p/anacrolix/
Python | 358 lines | 254 code | 44 blank | 60 comment | 39 complexity | 422a9e72cacb5c401916ccbe97b99c5a MD5 | raw file
  1. #!/usr/bin/env python
  2. import calendar, collections, htmlentitydefs, httplib, logging, os.path, pdb, pprint, re, string, sys, threading, time, urllib, urllib2, urlparse
  3. #_TIBIA_TIME_STRLEN = len("Mon DD YYYY, HH:MM:SS TZ")
  4. Killer = collections.namedtuple("Killer", ("isplayer", "name"))
  5. CharDeath = collections.namedtuple("CharDeath", ("time", "level", "killers"))
  6. class Character(object):
  7. _fields = ("name", "vocation", "level", "guild")
  8. def __init__(self, **kwargs):
  9. super(self.__class__, self).__init__()
  10. self.__data = {}
  11. for k, v in kwargs.items():
  12. self[k] = v
  13. def __setitem__(self, key, value):
  14. assert key in self._fields
  15. assert key not in self.__data
  16. self.__data[key] = value
  17. def __getattr__(self, name):
  18. if name in self._fields:
  19. return self.__data.get(name)
  20. else:
  21. raise AttributeError()
  22. def update(self, other):
  23. vars(self).update(vars(other))
  24. def is_online(self):
  25. return self.online
  26. def set_online(self, online, stamp):
  27. if not hasattr(self, "online"):
  28. self.last_status_change = stamp if online else 0
  29. elif online != self.online:
  30. self.last_status_change = stamp
  31. self.online = online
  32. def last_online(self):
  33. assert not self.online
  34. return self.last_status_change
  35. def last_offline(self):
  36. assert self.online
  37. return self.last_status_change
  38. def __ne__(self, other):
  39. return not self == other
  40. def __eq__(self, other):
  41. for a in self._fields:
  42. if getattr(self, a) != getattr(other, a):
  43. return False
  44. else:
  45. return True
  46. def __repr__(self):
  47. return "{0}({1})".format(self.__class__.__name__, ", ".join(("{0}={1!r}".format(f, getattr(self, f)) for f in self._fields)))
  48. def tibia_time_to_unix(s):
  49. a = s.split()
  50. #b = time.strptime(" ".join(a[:-1]) + " UTC", "%b %d %Y, %H:%M:%S %Z")
  51. b = time.strptime(" ".join(a[:-1]), "%b %d %Y, %H:%M:%S")
  52. c = calendar.timegm(b)
  53. #c = int(time.mktime(b))
  54. #c -= time.timezone
  55. HOUR = 3600
  56. c -= HOUR * {"CET": 1, "CEST": 2}[a[-1]]
  57. return c
  58. def next_whoisonline_update(secs=None):
  59. """Returns the unix epoch of the next reasonable time to update from tibia.com whoisonline pages"""
  60. a = list(time.gmtime(secs))
  61. min = (((((a[4] - 1) // 5) + 1) * 5) + 1)
  62. return calendar.timegm(a[0:4] + [min, 0] + a[6:9])
  63. def make_tibia_url(path, query):
  64. return urlparse.urlunsplit((
  65. "http",
  66. "www.tibia.com",
  67. path,
  68. urllib.urlencode(query),
  69. None),)
  70. def char_page_url(name):
  71. return make_tibia_url("/community/", {"subtopic": "characters", "name": name})
  72. def world_guilds_url(world):
  73. return make_tibia_url("/community/", {"subtopic": "guilds", "world": world})
  74. def guild_members_url(guild):
  75. return make_tibia_url("/community/", {"subtopic": "guilds", "page": "view", "GuildName": guild})
  76. def world_online_url(world):
  77. assert isinstance(world, str)
  78. return make_tibia_url("/community/", {"subtopic": "whoisonline", "world": world})
  79. def http_get(url):
  80. """Perform a compressed GET request on the Tibia webserver. Return the decoded response data and other info."""
  81. # prefer deflate, zlib, gzip as they add ascending levels of headers in that order
  82. # compress is not as strong compression, prefer it next
  83. # avoid identity and unhandled encodings at all cost
  84. request = urllib2.Request(
  85. url,
  86. headers={"Accept-Encoding": "deflate;q=1.0, zlib;q=0.9, gzip;q=0.8, compress;q=0.7, *;q=0"},)
  87. retries = 0
  88. while True:
  89. try:
  90. response = urllib2.urlopen(request)
  91. except urllib2.HTTPError as e:
  92. if e.code != 403:
  93. raise
  94. else:
  95. retries += 1
  96. print "fail:", time.time(), threading.current_thread().name
  97. time.sleep(retries)
  98. else:
  99. break
  100. del retries
  101. respdata = response.read()
  102. response.close()
  103. assert response.code == 200, (response.code, respdata)
  104. # decompress the response data
  105. assert len(respdata) == int(response.info()["Content-Length"])
  106. contentEncoding = response.info()["Content-Encoding"]
  107. if contentEncoding == "gzip":
  108. import gzip, io
  109. respdata = gzip.GzipFile(fileobj=io.BytesIO(respdata)).read()
  110. # retrieve the encoding, so we can decode the bytes to a string
  111. contentType = response.info()["Content-Type"]
  112. charset = re.search("charset=([^;\b]+)", contentType).group(1)
  113. if str != bytes:
  114. respdata = respdata.decode(charset)
  115. return respdata, response.info()
  116. #tibiacomConnection = httplib.HTTPConnection("www.tibia.com")
  117. #tibiacomConnLock = threading.Lock()
  118. #def http_get(url):
  119. #"""Perform a compressed GET request on the Tibia webserver. Return the decoded response data and other info."""
  120. ## prefer deflate, zlib, gzip as they add ascending levels of headers in that order
  121. ## compress is not as strong compression, prefer it next, avoid identity and unhandled encodings at all cost
  122. #with tibiacomConnLock:
  123. #tibiacomConnection.request("GET", url, headers={
  124. #"Accept-Encoding": "deflate;q=1.0, zlib;q=0.9, gzip;q=0.8, compress;q=0.7, *;q=0",})
  125. #response = tibiacomConnection.getresponse()
  126. #assert response.status == 200
  127. ##if response.status != 200:
  128. ## logging.error("%d: %s", response.status, response.reason)
  129. ##else:
  130. ## break
  131. ##print response.status, response.reason
  132. ##print response.getheaders()
  133. ###assert response.status == 200
  134. ## decompress the response data
  135. #respdata = response.read()
  136. ##response.close()
  137. #contentLength = response.msg["Content-Length"]
  138. #assert len(respdata) == int(response.msg["Content-Length"])
  139. #contentEncoding = response.msg.getheader("Content-Encoding")
  140. #if contentEncoding == "gzip":
  141. #import gzip, io
  142. #respdata = gzip.GzipFile(fileobj=io.BytesIO(respdata)).read()
  143. ## retrieve the encoding, so we can decode the bytes to a string
  144. #contentType = response.msg["Content-Type"]
  145. #charset = re.search("charset=([^;\b]+)", contentType).group(1)
  146. #if str != bytes:
  147. #respdata = respdata.decode(charset)
  148. #return respdata, response.msg
  149. STR = r"<tr[^>]*>"
  150. ETR = r"</tr>"
  151. STD = r"<td[^>]*>"
  152. ETD = r"</td>"
  153. TIBTIME = r"([^<]+)"
  154. LEVEL = r".*?at Level (\d+) by "
  155. KILLERS = r"(.*?)"
  156. def tibia_worlds_url():
  157. return make_tibia_url("/community/", {"subtopic": "whoisonline"})
  158. def parse_tibia_worlds(html):
  159. worldNames = set()
  160. for mo in re.finditer(
  161. r'<A HREF="http://www.tibia.com/community/\?subtopic=whoisonline&world=([^"]+)">', html):
  162. worldNames.add(mo.group(1))
  163. return worldNames
  164. def tibia_worlds():
  165. html = http_get(tibia_worlds_url())[0]
  166. return parse_tibia_worlds(html)
  167. def dilute_tibia_html_entities(html):
  168. return html.replace("\xa0", " ")
  169. def unescape_tibia_html(string):
  170. """Replace HTML entities in the string, and convert any Tibia-specific codepoints"""
  171. n2cp = htmlentitydefs.name2codepoint
  172. def substitute_entity(match):
  173. ent = match.group(2)
  174. if match.group(1) == "#":
  175. # numeric substitution
  176. return chr(int(ent))
  177. else:
  178. # get the codepoint from the name
  179. cp = n2cp.get(ent)
  180. if cp:
  181. #if a codepoint was found, return it's string value
  182. return chr(cp)
  183. else:
  184. # codepoint wasn't found, return the match untouched
  185. return match.group()
  186. def decode_entities(string):
  187. # catch any &(#)(12345); or &()(abcdefgh);
  188. entity_re = re.compile(r"&(#?)(\d{1,5}|\w{1,8});")
  189. # substitute all matches in the string with the result of the function call
  190. return entity_re.subn(substitute_entity, string)[0]
  191. # replace nbsp, there may be others too..
  192. return dilute_tibia_html_entities(decode_entities(string))
  193. def _parse_char_page_deaths(html):
  194. matchobj = re.search(r"<table.*?>Character Deaths<.*?</table>", html, re.IGNORECASE)
  195. if not matchobj:
  196. return []
  197. html = matchobj.group()
  198. #print(deathhtml)
  199. alldeaths = []
  200. for deathmo in re.finditer(STR + STD + TIBTIME + ETD + STD + LEVEL + KILLERS + ETD + ETR, html, re.IGNORECASE):
  201. time = unescape_tibia_html(deathmo.group(1))
  202. tibia_time_to_unix(time)
  203. level = int(deathmo.group(2))
  204. killers = []
  205. for killermo in re.finditer(r"(?:<a[^>]*>)?([^<]+?)(</a>)?(?: and |, |\.)", deathmo.group(3)):
  206. assert killermo.re.groups == 2
  207. killers.append(Killer(isplayer=bool(killermo.group(2)), name=unescape_tibia_html(killermo.group(1))))
  208. alldeaths.append(CharDeath(time=time, level=level, killers=killers))
  209. #pprint.pprint(alldeaths)
  210. return alldeaths
  211. class CharDoesNotExist(Exception):
  212. pass
  213. def parse_char_page(html, name):
  214. info = {}
  215. """Read the simple FieldName: Value data fields from character page HTML"""
  216. FIELDS = (
  217. # key name, field name, data transform, required field
  218. ("name", "Name", lambda x: unescape_tibia_html(x), True),
  219. ("former_names", "Former Names", lambda x: x.split(", "), False),
  220. ("sex", "Sex", None, True),
  221. ("vocation", "Profession", None, True),
  222. ("level", "Level", int, True),
  223. ("world", "World", None, True),
  224. ("residence", "Residence", None, True),
  225. ( "guild",
  226. "Guild&#160;membership",
  227. lambda x: unescape_tibia_html(re.search(r"<A.+?>([^<]+)</A>", x).group(1)),
  228. False),
  229. ("last_login", "Last login", lambda x: unescape_tibia_html(x), True),
  230. ("account_status", "Account&#160;Status", None, True),
  231. ("created", "Created", lambda x: unescape_tibia_html(x), False))
  232. for key, field, transform, required in FIELDS:
  233. hits = re.findall(
  234. "<TR.*?><TD.*?>{0}:</TD><TD>(.+?)</TD></TR>".format(field),
  235. html)
  236. if len(hits) == 0:
  237. if required:
  238. if re.search(
  239. r"Character <B>{0}</B> does not exist.".format(name),
  240. html):
  241. assert key == FIELDS[0][0]
  242. raise CharDoesNotExist(name)
  243. else:
  244. assert False, "Character page for %r: Required field %s not found" % (name, field)
  245. else:
  246. info[key] = None
  247. elif len(hits) == 1:
  248. info[key] = transform(hits[0]) if transform != None else hits[0]
  249. else:
  250. assert False, "Too many hits for field"
  251. info["deaths"] = _parse_char_page_deaths(html)
  252. # special name field handling
  253. a = info["name"].split(",", 1)
  254. info["name"] = a[0]
  255. if len(a) > 1:
  256. info["deletion"] = a[1].strip()
  257. if name != info["name"]:
  258. assert name in info["former_names"]
  259. #raise CharNameChanged(info)
  260. return info
  261. def get_char_page(name):
  262. return parse_char_page(http_get(char_page_url(name))[0], name)
  263. def get_world_online(world):
  264. return parse_world_online(http_get(world_online_url(world))[0])
  265. def parse_world_online(html):
  266. assert html
  267. # html is processed in this function, these are data transformations
  268. def vocation_check(vocation):
  269. assert vocation in ("None", "Knight", "Elite Knight", "Paladin", "Royal Paladin", "Druid", "Elder Druid", "Sorcerer", "Master Sorcerer")
  270. return vocation
  271. FIELDS = (
  272. ("name", lambda x: unescape_tibia_html(x)),
  273. ("level", lambda x: int(x)),
  274. ("vocation", vocation_check),)
  275. players = []
  276. row_re = re.compile("""<TR BGCOLOR=#[A-F0-9]+><TD WIDTH=\d+%><A HREF="http://www.tibia.com/community/\?subtopic=characters&name=[^"]+">([^<]+)</A></TD><TD WIDTH=\d+%>(\d+)</TD><TD WIDTH=\d+%>([^<]+)</TD></TR>""")
  277. for a in row_re.finditer(html):
  278. assert len(a.groups()) == len(FIELDS)
  279. # generate a dict from a list of pairs, with keys and values processed through FIELDS
  280. # FIELDS is iterated by index because of 1-based MatchObject.group()
  281. players.append(Character(**dict(
  282. ((f[0], f[1](a.group(i + 1))) for i, f in enumerate(FIELDS)))))
  283. # check the page player count matches the number of players we found
  284. pagePlayerCount = re.search(r"Currently (\d+) players are online\.", html)
  285. if pagePlayerCount is not None:
  286. pagePlayerCount = int(pagePlayerCount.group(1))
  287. else:
  288. pagePlayerCount = 0
  289. assert len(players) == pagePlayerCount
  290. return players
  291. def parse_world_guilds(html, world):
  292. groups = re.findall(r'<INPUT TYPE=hidden NAME=GuildName VALUE="([^"]+)', html)
  293. retval = set()
  294. for guild in [set([x]) for x in groups]:
  295. assert len(guild) == 1
  296. assert retval.isdisjoint(guild)
  297. retval.update(guild)
  298. return retval
  299. def parse_guild_members(html, guild):
  300. members = set()
  301. matches = re.findall(r'<TD><A HREF="http://www.tibia.com/community/\?subtopic=characters&name=[^"]+">([^<]+)</A>[^<>]*</TD>', html)
  302. for m in matches:
  303. assert m not in members
  304. members.add(unescape_tibia_html(m))
  305. return members
  306. def main():
  307. pprint.pprint(globals()[sys.argv[1]](*sys.argv[2:]))
  308. if __name__ == "__main__":
  309. main()