google_sets.py | searchcode

/google_sets.py

http://github.com/neingeist/puddingbrumsel2.0
Python | 32 lines | 23 code | 6 blank | 3 comment | 4 complexity | 215a5c281723e560a96bb7af56d819cf MD5 | raw file
Possible License(s): Apache-2.0


#!/usr/bin/env python
# vim: set fileencoding=utf-8:
import re
import urllib
import urllib2
from BeautifulSoup import BeautifulSoup

class google_sets:

  def get_new_words(self, word1, word2, filter = lambda w: True):
    url = 'http://labs.google.com/sets'
    values = { 'q1' : word1, 'q2' : word2 }
    newwords = []

    data = urllib.urlencode(values)
    req = urllib2.Request(url + "?" + data)
    response = urllib2.urlopen(req).read()

    soup = BeautifulSoup(response)
    # recht schoener html code so, bei google sets
    for r in soup.html.body.findAll("font", size=-1, face="Arial, sans-serif"):
      word = r.a.contents[0]
      if re.compile("^http://www.google.com/search\?hl=en&q=").match(r.a["href"]):
        if filter(word):
          newwords.append(word)

    return newwords

if __name__ == "__main__":
  g = google_sets()
  print g.get_new_words("rosenkohl", "wirsing", 
                        filter = lambda w: re.compile("^[a-zäöüß]+$").match(w))