/resolve.py
Python | 232 lines | 186 code | 12 blank | 34 comment | 23 complexity | aa8db09e72ddc4708d8005e276c92f14 MD5 | raw file
- # Wikipedia citation tools
- # resolve.py
- # Resolvers
- from bs4 import BeautifulSoup as bs
- from isbnlib import mask
- import utils as u
- def doi0(doi):
- """
- Doi to title and publisher, if either is not found then None is returned in place.
- """
- url = "http://su8bj7jh4j.search.serialssolutions.com/?id=DOI"
- doi = doi.replace('/', '%2F')
- url = url.replace('DOI', doi)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- citation_data = dict(zip([x.text.strip() for x in soup.select(".section .citation-data .span3")],
- [x.text.strip() for x in soup.select(".section .citation-data .span9")]))
- return citation_data['Journal:'], None
- except:
- return None, None
-
- def doi1(doi):
- """
- Doi to title and False, or (False, False) if no title found.
- """
- url = "http://search.crossref.org/dois?q=DOI"
- doi = doi.replace('/', '%2F')
- url = url.replace('DOI', doi)
- # print('URL: ' + url)
- try:
- content = u.safeget(url)
- j = json.loads(content)
- if j:
- return bs(j[0]['fullCitation'], 'html5lib').select('i')[0].text, None
- else:
- return None, None
- except:
- return None, None
- # WorldCat Advanced search lookup
- def isbn0(isbn):
- """
- Isbn to title and publisher, if either is not found then None is returned in place.
- """
- url = "https://www.worldcat.org/search?q=bn%3AISBN&qt=advanced&dblist=638"
- url = url.replace('ISBN', isbn)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- title = soup.select(".name a strong")[0].text
- if not "ISBN" in title:
- publisher = soup.find("span", class_="itemPublisher").text.split(': ')[1].split(',')[0]
- if publisher:
- return title, publisher
- else:
- return title, None
- else:
- return None, None
- except:
- return None, None
- # Bookfinder API
- # http://www.bookfinder.com/search/?isbn=ISBN&st=xl&ac=qr
- def isbn1(isbn):
- """
- Isbn to title and publisher, if either is not found then None is returned in place.
- """
- url = "http://www.bookfinder.com/search/?isbn=ISBN&st=xl&ac=qr"
- url = url.replace('ISBN', isbn)
- # print('URL: ' + url)
- try:
- html = safeget(url).text
- soup = bs(html, 'html5lib')
- title = soup.title.text.strip()
- title = title.split(' (')[0]
- if title and (('Search error' not in title) and ('ISBN' not in title) and ('BookFinder.com: Forbidden' not in title)):
- publisher = soup.find(itemprop="publisher").text.split(',')[0]
- return title.split(' by ')[0], publisher or None
- else:
- return None, None
- except:
- return None, None
- # Isbnsearch API
- # http://www.isbnsearch.org/isbn/ISBN
- def isbn2(isbn):
- """
- Isbn to title and False, or (False, False) if no title found.
- """
- url = "http://www.isbnsearch.org/isbn/ISBN"
- url = url.replace('ISBN', isbn)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- title = soup.title.text
- publisher = [p.text.split(': ')[1] for p in soup.select(".bookinfo")[0].find_all('p') if "Publisher" in p.text][0]
- return title.split('|')[1].split('(')[0].strip(), publisher or None
- except:
- return None, None
- # Open Library API
- # http://openlibrary.org/api/books?bibkeys=ISBN:ISBN&details=true
- def isbn3(isbn):
- """
- Isbn to title and publisher, if either is not found then None is returned in place.
- """
- url = "http://openlibrary.org/api/books?bibkeys=ISBN:ISBN&details=true"
- url = url.replace('ISBN', isbn)
- # print('URL: ' + url)
- try:
- content = u.safeget(url).text
- j = json.loads(content.replace('var _OLBookInfo = ', '')[:-1])
- return j['ISBN:'+isbn]['details']['title'], j['ISBN:'+isbn]['details']['publishers']
- except:
- return None, None
- # Amazon Advanced Search lookup
- def isbn4(isbn):
- """
- Isbn to title and publisher, if either is not found then None is returned in place.
- """
- url = "http://www.amazon.com/gp/search/ref=sr_adv_b/?search-alias=stripbooks&unfiltered=1&field-isbn=ISBN&field-dateop=During&sort=relevanceexprank&Adv-Srch-Books-Submit.x=22&Adv-Srch-Books-Submit.y=5"
- url = url.replace('ISBN', isbn)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- title = soup.select('.s-access-detail-page h2')[0].text
- # API required for looking at the details of the book like this:
- # publisher_page_link = soup.find("a", class_="a-link-normal a-text-normal")['href']
- # publisher_page_soup = bs(u.safeget(publisher_page_link).text, 'html5lib')
- return title, None
- except:
- return False, None
- def pmid0(pmid):
- """
- PMID to title. None is also returned in place of publisher.
- """
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&id=PMID"
- url = url.replace('PMID', pmid)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- return soup.find('item', {'name':"Source"}).text, None
- except:
- return None, None
-
- def pmc0(pmc):
- """
- PMC to title. None is also returned in place of publisher.
- """
- url = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pmc&id=PMC"
- url = url.replace('PMC', pmc)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- return soup.find('item', {'name':"Source"}).text, None
- except:
- return None, None
-
- def arxiv0(arxiv):
- """
- Arxiv ID to title. None is also returned in place of publisher.
- """
- url = "http://arxiv.org/abs/ARXIV"
- url = url.replace('ARXIV', arxiv)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- doi = soup.find('meta', {'name':"citation_doi"})['content']
- resolved = doi0(doi)
- if resolved[0]:
- return resolved
- else:
- return doi1(doi)
- except:
- return None, None
-
- def arxiv1(arxiv):
- """
- Arxiv ID to title. None is also returned in place of publisher.
- """
- url = "http://export.arxiv.org/api/query?id_list=ARXIV"
- url = url.replace('ARXIV', arxiv)
- # print('URL: ' + url)
- try:
- html = u.safeget(url).text
- soup = bs(html, 'html5lib')
- # print('Found doi:', soup.find('arxiv:doi').text)
- return doi0(soup.find('arxiv:doi').text)
- except:
- return None, None
- def is_academic(isbn):
- """
- Thomson Reuters Web of Knowledge Book Master List.
- For checking if an ISBN is "scientific" or not.
- http://wokinfo.com/cgi-bin/bkci/search.cgi
- Example matching ISBN: 978-0-415-59181-2
- Note: Probably needs minimum -s 5 to work!
- """
- try:
- html = u.safepost("http://wokinfo.com/cgi-bin/bkci/search.cgi", data={'search': mask(isbn), 'searchtype': 'and'}).text
- soup = bs(html, 'html5lib')
- title = soup.select("tbody")[1].select("td")[0].text.rstrip()
- if 'No matches to your query.' in title:
- return False
- else:
- return True
- except:
- return None