PageRenderTime 62ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/computer/common/lequipe_fr.py

https://gitlab.com/edelans/scoragora
Python | 180 lines | 163 code | 6 blank | 11 comment | 20 complexity | d0a6098e4d8e5d4cd0fc327508de1bc0 MD5 | raw file
  1. # -*- coding: utf-8 -*-
  2. from sniffer import Sniffer
  3. import requests
  4. import bs4 as BeautifulSoup
  5. import re
  6. import datetime
  7. import multiprocessing as mp
  8. from db.db_connector import DBConnector
  9. from db.team import Team
  10. from db.match import Match
  11. from common.logger import Logger
  12. from bson.objectid import ObjectId
  13. months = {
  14. u'Janvier': 1,
  15. u'F\xc3\xa9vrier': 2,
  16. u'Mars': 3,
  17. u'Avril': 4,
  18. u'Mai': 5,
  19. u'Juin': 6,
  20. u'Juillet': 7,
  21. u'Ao\xc3\xbbt': 8,
  22. u'Septembre': 9,
  23. u'Octobre': 10,
  24. u'Novembre': 11,
  25. u'D\xc3\xa9cembre': 12
  26. }
  27. def parseDate(date):
  28. parts = date.split();
  29. Logger.log(parts)
  30. # Get Day
  31. if (parts[1] == "1er") :
  32. day = 1
  33. else :
  34. day = int(parts[1])
  35. # Get Month
  36. month = months[parts[2]]
  37. # Get Year
  38. year = int(parts[3])
  39. # Get hours
  40. times = parts[4].split('h')
  41. hours = int(times[0])
  42. if (len(times) > 1 and not times[1] == "") :
  43. minutes = int(times[1])
  44. else :
  45. minutes = 0
  46. parsed_date = datetime.datetime(year, month, day, hours, minutes)
  47. return parsed_date
  48. def getMatchDetail(match_id):
  49. url = "http://www.lequipe.fr/Football/match/{0}".format(match_id)
  50. Logger.log(url)
  51. html = requests.get(url).text
  52. soup = BeautifulSoup.BeautifulSoup(html)
  53. # Get date
  54. div_date = soup.select("#placarfore .info_date")
  55. date = None
  56. if (div_date):
  57. str_date = div_date[0].getText()
  58. if (str_date) :
  59. date = parseDate(str_date)
  60. # Get home team
  61. div_hometeam = soup.select("#EqDom .equipe a")
  62. str_hometeam = None
  63. if (div_hometeam):
  64. str_hometeam = div_hometeam[0].getText().strip()
  65. # Get away team
  66. div_awayteam = soup.select("#EqExt .equipe a")
  67. str_awayteam = None
  68. if (div_awayteam):
  69. str_awayteam = div_awayteam[0].getText().strip()
  70. result = {}
  71. if str_awayteam:
  72. result["awayteam"] = str_awayteam
  73. if str_hometeam:
  74. result["hometeam"] = str_hometeam
  75. if date:
  76. result["date"] = date
  77. return result
  78. def getDayMatches(day, day_url):
  79. html = requests.get(day_url).text
  80. soup = BeautifulSoup.BeautifulSoup(html)
  81. result = []
  82. for line in soup.select('[idmatch]'):
  83. # Get match detail
  84. match_id = line.get('idmatch')
  85. detail = getMatchDetail(match_id)
  86. if 'awayteam' in detail and 'hometeam' in detail:
  87. awayteam = Team.retrieveTeamFromLequipe(detail['awayteam'])
  88. if not awayteam:
  89. Team.insertTeamFromLequipe(detail['awayteam'])
  90. awayteam = Team.retrieveTeamFromLequipe(detail['awayteam'])
  91. hometeam = Team.retrieveTeamFromLequipe(detail['hometeam'])
  92. if not hometeam:
  93. Team.insertTeamFromLequipe(detail['hometeam'])
  94. hometeam = Team.retrieveTeamFromLequipe(detail['hometeam'])
  95. # Check if the match already exists
  96. match = Match.retrieveMatchFromTeamsAndCompetition(competition_id, hometeam['_id'], awayteam['_id'])
  97. if match:
  98. # Update lequipe field
  99. Match.updateLequipeFields(match['_id'], match_id)
  100. else:
  101. # Create a new match
  102. Match.insertMatchFromLequipe(competition, hometeam['_id'], awayteam['_id'], match_id)
  103. match = Match.retrieveMatchFromTeamsAndCompetition(competition_id, hometeam['_id'], awayteam['_id'])
  104. result.append(match['_id'])
  105. return result
  106. class LequipeFr(Sniffer):
  107. """Implementation of a sniffer for lequipe.fr"""
  108. def parseChampionship(self, competition_id, url):
  109. competition_id = ObjectId(competition_id)
  110. url = 'http://www.lequipe.fr/Football/ligue-1-resultats.html'
  111. html = requests.get(url).text
  112. soup = BeautifulSoup.BeautifulSoup(html)
  113. root = soup.find(attrs={'name': 'IDNIVEAU'})
  114. if (not root):
  115. print("Snif has failed with the given url : {0}".format(url))
  116. return
  117. days = []
  118. list = root.select('option');
  119. for option in list:
  120. regex = "([0-9]+).*"
  121. regexResult = re.match(regex, option.get_text())
  122. day = int(regexResult.group(1))
  123. day_url = option.get('value')
  124. if (day and day_url):
  125. days.append({'day': day, 'day_url': 'http://www.lequipe.fr{0}'.format(day_url)})
  126. Logger.log("championship number of days : {0}".format(len(days)))
  127. pool = mp.Pool(processes=10)
  128. results = [{'process': pool.apply_async(getDayMatches, (day['day'], day['day_url'])), "day": day['day']} for day in days]
  129. output = [{"matches": p["process"].get(), "day": p["day"]} for p in results]
  130. def parseMatchFinalScore(self):
  131. if self.automation is null:
  132. return {"successful": False, "message": "no automated parameter defined"}
  133. if "lequipe" not in self.automation.keys() or "url" not in self.automation["lequipe"].keys():
  134. return {"successful": False, "message": "no automated parameter defined for lequipe.fr"}
  135. #url = self.automation["lequipe"]["url"]
  136. url = 'http://www.lequipe.fr/Football/match/316316'
  137. html = requests.get(url).text
  138. soup = BeautifulSoup.BeautifulSoup(html)
  139. soup.find(id="timeline")
  140. status = soup.select("#timeline .sco .min")
  141. # Check if the match has ended
  142. if not status[0]['statut'] == "T":
  143. return {"successful": False, "message": "Match not terminated"}
  144. # Get the score
  145. hometeam_score = soup.find(id='scoDom')
  146. awayteam_score = soup.find(id='scoExt')
  147. if (hometeam_score and awayteam_score):
  148. try:
  149. hometeam_score = int(hometeam_score.getText())
  150. awayteam_score = int(awayteam_score.getText())
  151. return {'awayteam_score': awayteam_score, 'hometeam_score': hometeam_score, 'successful': True}
  152. except ValueError:
  153. Logger.log("Impossible to parse the score")
  154. return {"successful": False}
  155. def parseMatchLiveScore(self):
  156. url = 'http://www.lequipe.fr/Football/match/316316'
  157. html = requests.get(url).text
  158. soup = BeautifulSoup.BeautifulSoup(html)
  159. hometeam_score = soup.find(id='scoDom')
  160. awayteam_score = soup.find(id='scoExt')
  161. if (hometeam_score and awayteam_score):
  162. try:
  163. hometeam_score = int(hometeam_score.getText())
  164. awayteam_score = int(awayteam_score.getText())
  165. return {'awayteam_score': awayteam_score, 'hometeam_score': hometeam_score, 'successful': True}
  166. except ValueError:
  167. Logger.log("Impossible to parse the score")
  168. return {"successful": False}