/google_sets.py
Python | 32 lines | 23 code | 6 blank | 3 comment | 4 complexity | 215a5c281723e560a96bb7af56d819cf MD5 | raw file
Possible License(s): Apache-2.0
- #!/usr/bin/env python
- # vim: set fileencoding=utf-8:
- import re
- import urllib
- import urllib2
- from BeautifulSoup import BeautifulSoup
- class google_sets:
- def get_new_words(self, word1, word2, filter = lambda w: True):
- url = 'http://labs.google.com/sets'
- values = { 'q1' : word1, 'q2' : word2 }
- newwords = []
- data = urllib.urlencode(values)
- req = urllib2.Request(url + "?" + data)
- response = urllib2.urlopen(req).read()
- soup = BeautifulSoup(response)
- # recht schoener html code so, bei google sets
- for r in soup.html.body.findAll("font", size=-1, face="Arial, sans-serif"):
- word = r.a.contents[0]
- if re.compile("^http://www.google.com/search\?hl=en&q=").match(r.a["href"]):
- if filter(word):
- newwords.append(word)
- return newwords
- if __name__ == "__main__":
- g = google_sets()
- print g.get_new_words("rosenkohl", "wirsing",
- filter = lambda w: re.compile("^[a-zäöüß]+$").match(w))