PageRenderTime 50ms CodeModel.GetById 22ms RepoModel.GetById 0ms app.codeStats 0ms

/creepy/flickr.py

https://github.com/Br3nda/creepy
Python | 204 lines | 164 code | 7 blank | 33 comment | 0 complexity | 8ce461f6e5317d53bd880229fb7cb9ee MD5 | raw file
  1. '''
  2. Copyright 2010 Yiannis Kakavas
  3. This file is part of creepy.
  4. creepy is free software: you can redistribute it and/or modify
  5. it under the terms of the GNU General Public License as published by
  6. the Free Software Foundation, either version 3 of the License, or
  7. (at your option) any later version.
  8. creepy is distributed in the hope that it will be useful,
  9. but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. GNU General Public License for more details.
  12. You should have received a copy of the GNU General Public License
  13. along with creepy If not, see <http://www.gnu.org/licenses/>.
  14. '''
  15. import urllib
  16. import flickrapi
  17. from flickrapi.exceptions import FlickrError
  18. import re
  19. from BeautifulSoup import BeautifulSoup as bs
  20. class Flickr():
  21. """
  22. Wrapper class for the Flickr API.
  23. provides functionality for user search and information retrieval
  24. """
  25. def __init__(self, conf_file):
  26. self.api = flickrapi.FlickrAPI(conf_file['flickr']['api_key'])
  27. self.photo_dir = conf_file['directories']['profilepics_dir']
  28. def search_real_name(self, input):
  29. """
  30. Search user by real name
  31. Provides search function by real name. This is not provided by flickr API so
  32. it needs to be done the old(html-scrapping) way.
  33. Returns a list of user dictionaries
  34. """
  35. html = urllib.urlopen("http://www.flickr.com/search/people/?see=none&q=" + input + "&m=names").read()
  36. '''
  37. Removing some javascript that choked BeautifulSoup's parser
  38. '''
  39. html = re.sub("(?is)(<script[^>]*>)(.*?)(</script>)", "", html)
  40. soup = bs(html)
  41. id = []
  42. username = []
  43. name = []
  44. for r in soup.findAll('h2'):
  45. id_temp = r.a['href'].replace("/photos/", "")[:-1]
  46. if re.match(r'[\d]+@[A-Z][\d]+', id_temp):
  47. id.append(id_temp)
  48. else:
  49. id.append(self.getid_from_name(r.a.string))
  50. username.append(r.a.string)
  51. try:
  52. name.append(r.next.next.next.next.b.string)
  53. except Exception:
  54. name.append("")
  55. pics = [p.img['src'] for p in soup.findAll(attrs={"class":"Icon"})]
  56. user_list = zip(id, username, name, pics)
  57. users = []
  58. for user in user_list:
  59. try:
  60. temp_file = '%sprofile_pic_%s' % (self.photo_dir, user[0])
  61. urllib.urlretrieve(user[3], temp_file)
  62. except Exception, err:
  63. pass
  64. #print 'Error retrieving %s profile picture' % (user[1]), err
  65. users.append({'id':user[0], 'username': user[1], 'realname':user[2], 'location':'' })
  66. return users
  67. def search_user(self, input):
  68. """
  69. Wrapper to the search function provided by flickr API
  70. Returns a list of user dictionaries
  71. """
  72. if re.match("[\w\-\.+]+@(\w[\w\-]+\.)+[\w\-]+", input):
  73. try:
  74. results = self.api.people_findByEmail(find_email=input)
  75. except FlickrError:
  76. return
  77. else:
  78. try:
  79. results = self.api.people_findByUsername(username=input)
  80. except FlickrError, err:
  81. #print 'Error from flickr api ', err
  82. return
  83. if results.attrib['stat'] == "ok":
  84. user_list = []
  85. #print results.find('user')
  86. for i in results.find('user').items():
  87. user_list.append(self.get_user_info(i[1]))
  88. return user_list
  89. def getid_from_name(self, username):
  90. """
  91. Gets user's nsid from flickr
  92. Returns user's nsid
  93. """
  94. try:
  95. result = self.api.people_findByUsername(username=username)
  96. return result.find('user').attrib['nsid']
  97. except FlickrError, err:
  98. return
  99. def get_user_info(self, id):
  100. """
  101. Retrieves a user's username, real name and location as provided by flickr API
  102. Returns a user dictionary
  103. """
  104. results = self.api.people_getInfo(user_id=id)
  105. if results.attrib['stat'] == 'ok':
  106. user = {'id':id, 'username':'', 'realname':'', 'location':''}
  107. res = results.find('person')
  108. user['username'] = res.find('username').text
  109. if res.find('realname'):
  110. user['realname'] = res.find('realname').text
  111. if res.find('location'):
  112. user['location'] = res.find('location').text
  113. return user
  114. def get_user_photos(self, id, page_nr):
  115. """
  116. Retrieves a users public photos.
  117. Authentication and retrieval of protected photos is not yet implemented
  118. Returns a list with all the photos
  119. """
  120. try:
  121. results = self.api.people_getPublicPhotos(user_id=id, extras="geo, date_taken", per_page=500, page=page_nr)
  122. if results.attrib['stat'] == 'ok':
  123. return results.find('photos').findall('photo')
  124. except Exception , err:
  125. conn_err = {'from':'flickr', 'tweetid':'', 'url': 'flickr' ,'error':err.message}
  126. self.errors.append(conn_err)
  127. def get_locations(self, photos):
  128. """
  129. Determines location information from a list of photos
  130. Extracts the geo data provided by flickr API and returns a
  131. dictionary of locations
  132. """
  133. locations = []
  134. if photos:
  135. for photo in photos:
  136. if photo.attrib['latitude'] != '0':
  137. loc = {}
  138. loc['context'] = ('http://www.flickr.com/photos/%s/%s' % (photo.attrib['owner'], photo.attrib['id']), 'Photo from flickr \n Title : %s \n ' % (photo.attrib['title']))
  139. loc['time'] = photo.attrib['datetaken']
  140. loc['latitude'] = photo.attrib['latitude']
  141. loc['longitude'] = photo.attrib['longitude']
  142. loc['accuracy'] = photo.attrib['accuracy']
  143. locations.append(loc)
  144. return locations
  145. def return_locations(self, id):
  146. """
  147. Wrapper function for the location retrieval.
  148. Returns all the locations detected from the user's photos
  149. """
  150. self.errors = []
  151. locations_list = []
  152. result_params = {}
  153. try:
  154. results = self.api.people_getPublicPhotos(user_id=id, extras="geo, date_taken", per_page=500)
  155. if results.attrib['stat'] == 'ok':
  156. res = results.find('photos')
  157. total_photos = res.attrib['total']
  158. pages = int(res.attrib['pages'])
  159. #print "pages :" + str(pages) + " , total :" + total_photos
  160. if pages > 1:
  161. for i in range(1, pages + 1, 1):
  162. locations_list.extend(self.get_locations(self.get_user_photos(id, i)))
  163. else:
  164. locations_list.extend(self.get_locations(results.find('photos').findall('photo')))
  165. except FlickrError, err:
  166. conn_err = {'from':'flickr_photos', 'tweetid':'', 'url': 'flickr' ,'error':err.message}
  167. self.errors.append(conn_err)
  168. result_params['flickr_errors'] = self.errors
  169. return (locations_list, result_params)