/computer/common/lequipe_fr.py
Python | 180 lines | 163 code | 6 blank | 11 comment | 20 complexity | d0a6098e4d8e5d4cd0fc327508de1bc0 MD5 | raw file
- # -*- coding: utf-8 -*-
- from sniffer import Sniffer
- import requests
- import bs4 as BeautifulSoup
- import re
- import datetime
- import multiprocessing as mp
- from db.db_connector import DBConnector
- from db.team import Team
- from db.match import Match
- from common.logger import Logger
- from bson.objectid import ObjectId
- months = {
- u'Janvier': 1,
- u'F\xc3\xa9vrier': 2,
- u'Mars': 3,
- u'Avril': 4,
- u'Mai': 5,
- u'Juin': 6,
- u'Juillet': 7,
- u'Ao\xc3\xbbt': 8,
- u'Septembre': 9,
- u'Octobre': 10,
- u'Novembre': 11,
- u'D\xc3\xa9cembre': 12
- }
- def parseDate(date):
- parts = date.split();
- Logger.log(parts)
- # Get Day
- if (parts[1] == "1er") :
- day = 1
- else :
- day = int(parts[1])
- # Get Month
- month = months[parts[2]]
- # Get Year
- year = int(parts[3])
- # Get hours
- times = parts[4].split('h')
- hours = int(times[0])
- if (len(times) > 1 and not times[1] == "") :
- minutes = int(times[1])
- else :
- minutes = 0
- parsed_date = datetime.datetime(year, month, day, hours, minutes)
- return parsed_date
- def getMatchDetail(match_id):
- url = "http://www.lequipe.fr/Football/match/{0}".format(match_id)
- Logger.log(url)
- html = requests.get(url).text
- soup = BeautifulSoup.BeautifulSoup(html)
- # Get date
- div_date = soup.select("#placarfore .info_date")
- date = None
- if (div_date):
- str_date = div_date[0].getText()
- if (str_date) :
- date = parseDate(str_date)
- # Get home team
- div_hometeam = soup.select("#EqDom .equipe a")
- str_hometeam = None
- if (div_hometeam):
- str_hometeam = div_hometeam[0].getText().strip()
- # Get away team
- div_awayteam = soup.select("#EqExt .equipe a")
- str_awayteam = None
- if (div_awayteam):
- str_awayteam = div_awayteam[0].getText().strip()
- result = {}
- if str_awayteam:
- result["awayteam"] = str_awayteam
- if str_hometeam:
- result["hometeam"] = str_hometeam
- if date:
- result["date"] = date
- return result
- def getDayMatches(day, day_url):
- html = requests.get(day_url).text
- soup = BeautifulSoup.BeautifulSoup(html)
- result = []
- for line in soup.select('[idmatch]'):
- # Get match detail
- match_id = line.get('idmatch')
- detail = getMatchDetail(match_id)
- if 'awayteam' in detail and 'hometeam' in detail:
- awayteam = Team.retrieveTeamFromLequipe(detail['awayteam'])
- if not awayteam:
- Team.insertTeamFromLequipe(detail['awayteam'])
- awayteam = Team.retrieveTeamFromLequipe(detail['awayteam'])
- hometeam = Team.retrieveTeamFromLequipe(detail['hometeam'])
- if not hometeam:
- Team.insertTeamFromLequipe(detail['hometeam'])
- hometeam = Team.retrieveTeamFromLequipe(detail['hometeam'])
- # Check if the match already exists
- match = Match.retrieveMatchFromTeamsAndCompetition(competition_id, hometeam['_id'], awayteam['_id'])
- if match:
- # Update lequipe field
- Match.updateLequipeFields(match['_id'], match_id)
- else:
- # Create a new match
- Match.insertMatchFromLequipe(competition, hometeam['_id'], awayteam['_id'], match_id)
- match = Match.retrieveMatchFromTeamsAndCompetition(competition_id, hometeam['_id'], awayteam['_id'])
- result.append(match['_id'])
- return result
-
- class LequipeFr(Sniffer):
- """Implementation of a sniffer for lequipe.fr"""
-
- def parseChampionship(self, competition_id, url):
- competition_id = ObjectId(competition_id)
- url = 'http://www.lequipe.fr/Football/ligue-1-resultats.html'
- html = requests.get(url).text
- soup = BeautifulSoup.BeautifulSoup(html)
- root = soup.find(attrs={'name': 'IDNIVEAU'})
- if (not root):
- print("Snif has failed with the given url : {0}".format(url))
- return
- days = []
- list = root.select('option');
- for option in list:
- regex = "([0-9]+).*"
- regexResult = re.match(regex, option.get_text())
- day = int(regexResult.group(1))
- day_url = option.get('value')
- if (day and day_url):
- days.append({'day': day, 'day_url': 'http://www.lequipe.fr{0}'.format(day_url)})
- Logger.log("championship number of days : {0}".format(len(days)))
- pool = mp.Pool(processes=10)
- results = [{'process': pool.apply_async(getDayMatches, (day['day'], day['day_url'])), "day": day['day']} for day in days]
- output = [{"matches": p["process"].get(), "day": p["day"]} for p in results]
-
- def parseMatchFinalScore(self):
- if self.automation is null:
- return {"successful": False, "message": "no automated parameter defined"}
- if "lequipe" not in self.automation.keys() or "url" not in self.automation["lequipe"].keys():
- return {"successful": False, "message": "no automated parameter defined for lequipe.fr"}
- #url = self.automation["lequipe"]["url"]
- url = 'http://www.lequipe.fr/Football/match/316316'
- html = requests.get(url).text
- soup = BeautifulSoup.BeautifulSoup(html)
- soup.find(id="timeline")
- status = soup.select("#timeline .sco .min")
- # Check if the match has ended
- if not status[0]['statut'] == "T":
- return {"successful": False, "message": "Match not terminated"}
-
- # Get the score
- hometeam_score = soup.find(id='scoDom')
- awayteam_score = soup.find(id='scoExt')
- if (hometeam_score and awayteam_score):
- try:
- hometeam_score = int(hometeam_score.getText())
- awayteam_score = int(awayteam_score.getText())
- return {'awayteam_score': awayteam_score, 'hometeam_score': hometeam_score, 'successful': True}
- except ValueError:
- Logger.log("Impossible to parse the score")
- return {"successful": False}
-
- def parseMatchLiveScore(self):
- url = 'http://www.lequipe.fr/Football/match/316316'
- html = requests.get(url).text
- soup = BeautifulSoup.BeautifulSoup(html)
- hometeam_score = soup.find(id='scoDom')
- awayteam_score = soup.find(id='scoExt')
- if (hometeam_score and awayteam_score):
- try:
- hometeam_score = int(hometeam_score.getText())
- awayteam_score = int(awayteam_score.getText())
- return {'awayteam_score': awayteam_score, 'hometeam_score': hometeam_score, 'successful': True}
- except ValueError:
- Logger.log("Impossible to parse the score")
- return {"successful": False}