PageRenderTime 42ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/google_sets.py

http://github.com/neingeist/puddingbrumsel2.0
Python | 32 lines | 23 code | 6 blank | 3 comment | 4 complexity | 215a5c281723e560a96bb7af56d819cf MD5 | raw file
Possible License(s): Apache-2.0
  1. #!/usr/bin/env python
  2. # vim: set fileencoding=utf-8:
  3. import re
  4. import urllib
  5. import urllib2
  6. from BeautifulSoup import BeautifulSoup
  7. class google_sets:
  8. def get_new_words(self, word1, word2, filter = lambda w: True):
  9. url = 'http://labs.google.com/sets'
  10. values = { 'q1' : word1, 'q2' : word2 }
  11. newwords = []
  12. data = urllib.urlencode(values)
  13. req = urllib2.Request(url + "?" + data)
  14. response = urllib2.urlopen(req).read()
  15. soup = BeautifulSoup(response)
  16. # recht schoener html code so, bei google sets
  17. for r in soup.html.body.findAll("font", size=-1, face="Arial, sans-serif"):
  18. word = r.a.contents[0]
  19. if re.compile("^http://www.google.com/search\?hl=en&q=").match(r.a["href"]):
  20. if filter(word):
  21. newwords.append(word)
  22. return newwords
  23. if __name__ == "__main__":
  24. g = google_sets()
  25. print g.get_new_words("rosenkohl", "wirsing",
  26. filter = lambda w: re.compile("^[a-zäöüß]+$").match(w))