PageRenderTime 187ms queryTime 38ms sortTime 1ms getByIdsTime 64ms findMatchingLines 22ms

100+ results results for 'beautifulsoup' (187 ms)

Not the results you expected?
registerbot.py https://gitlab.com/Desikan/botebc | Python | 296 lines
                    
11from PIL import ImageDraw
                    
12from bs4 import BeautifulSoup 
                    
13
                    
93		raw_html = self.browser.response().read()
                    
94		soup = BeautifulSoup(raw_html)
                    
95
                    
149		   mechanize object.'''
                    
150		soup = BeautifulSoup(self.response)
                    
151		formcount=0
                    
165		   mechanize object.'''
                    
166		soup = BeautifulSoup(self.response)
                    
167		formcount=0
                    
180		''' Displays all the links from the current browser page.'''
                    
181		soup = BeautifulSoup(raw_html)
                    
182		categories_table_obj = soup.find("table", attrs={"id":'my_table'})
                    
                
google_soup.py https://github.com/chudler/Community-Zenpacks.git | Python | 221 lines
                    
12import logging.handlers
                    
13from BeautifulSoup import BeautifulSoup
                    
14from ClientForm import *
                    
83    def findApplications(self):
                    
84        main_apps = BeautifulSoup(self.resetClient())
                    
85        column_headers = []
                    
87        for table in main_apps.findAll('table', limit=1):
                    
88            # table is a BeautifulSoup.Tag object
                    
89            column_headers = self.extract_headings(table)
                    
148            quota_details = re.sub(quota_fix, '', quota_details)
                    
149            quota_soup = BeautifulSoup(quota_details)
                    
150            quota_section = quota_soup.find(attrs={'id':'ae-quota-details'})
                    
168        app_main = open('/tmp/dashboard.html', 'r').read()
                    
169        app_soup = BeautifulSoup(app_main)
                    
170        load_section = app_soup.find(text=re.compile('Current Load'))
                    
                
util.py https://github.com/sunlightlabs/muni_words.git | Python | 198 lines
                    
8from django.contrib.gis.geos import Point
                    
9from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
                    
10from excludes import EXCLUDED
                    
40def strip_html(string):
                    
41    return ''.join([e for e in BeautifulSoup(string).recursiveChildGenerator() if isinstance(e, unicode)]).replace(' ', ' ')
                    
42
                    
                
testFunctional.py https://github.com/bogtan/Naaya.git | Python | 213 lines
                    
21from unittest import TestSuite, makeSuite
                    
22from BeautifulSoup import BeautifulSoup
                    
23
                    
148        html = self.browser.get_html()
                    
149        soup = BeautifulSoup(html)
                    
150
                    
                
_htmlparser.py https://github.com/yoheia/yoheia.git | Python | 265 lines
                    
53
                    
54class BeautifulSoupHTMLParser(HTMLParser):
                    
55    def handle_starttag(self, name, attrs):
                    
163        args, kwargs = self.parser_args
                    
164        parser = BeautifulSoupHTMLParser(*args, **kwargs)
                    
165        parser.soup = self.soup
                    
169            warnings.warn(RuntimeWarning(
                    
170                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
                    
171            raise e
                    
198""", re.VERBOSE)
                    
199    BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
                    
200
                    
261
                    
262    BeautifulSoupHTMLParser.parse_starttag = parse_starttag
                    
263    BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
                    
                
main.py https://github.com/gtracy/APODEmail.git | Python | 249 lines
                    
18
                    
19from BeautifulSoup import BeautifulSoup, Tag
                    
20
                    
184
                    
185     soup = BeautifulSoup(result.content)
                    
186     #logging.debug(soup)
                    
                
TV3Scrapper.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 254 lines
                    
9import sys
                    
10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup
                    
11import urllib, urllib2
                    
                
TVSeriesUtil.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 224 lines
                    
9import sys
                    
10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup
                    
11import urllib, urllib2, cookielib
                    
                
hackerrankops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 200 lines
                    
9import logging # TODO get rid of these print statements!
                    
10from bs4 import BeautifulSoup
                    
11
                    
173        return
                    
174    csrfHtml = BeautifulSoup(r.text, 'html.parser').find(id = 'csrf-token')
                    
175    if csrfHtml:
                    
                
diagnose.py https://gitlab.com/Rheinhart/csuchen-Guard | Python | 216 lines
                    
8import bs4
                    
9from bs4 import BeautifulSoup, __version__
                    
10from bs4.builder import builder_registry
                    
69        try:
                    
70            soup = BeautifulSoup(data, parser)
                    
71            success = True
                    
178            a = time.time()
                    
179            soup = BeautifulSoup(data, parser)
                    
180            b = time.time()
                    
207    vars = dict(bs4=bs4, data=data, parser=parser)
                    
208    cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
                    
209
                    
                
default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 406 lines
                    
32
                    
33from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
                    
34import socket
                    
108
                    
109        beautifulSoup = BeautifulSoup(http)
                    
110        userPanel = beautifulSoup.find('a', {"id": "loginlink"})
                    
171
                    
172    beautifulSoup = BeautifulSoup(http)
                    
173    content = beautifulSoup.find('div', attrs={'id': 'dle-content'})
                    
243
                    
244    beautifulSoup = BeautifulSoup(http)
                    
245    categoryContainer = beautifulSoup.find('ul', 'cats')
                    
273
                    
274    beautifulSoup = BeautifulSoup(http)
                    
275    tagsContainer = beautifulSoup.find('td', 'news')
                    
                
jobs.py git://pkgs.fedoraproject.org/sugar-read | Python | 310 lines
                    
26import os.path
                    
27import BeautifulSoup
                    
28
                    
71    def _searchfile(self, fileobj):
                    
72        soup = BeautifulSoup.BeautifulSoup(fileobj)
                    
73        body = soup.find('body')
                    
                
test_converter_unittest.py https://gitlab.com/x33n/phantomjs | Python | 319 lines
                    
34from webkitpy.common.system.outputcapture import OutputCapture
                    
35from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
                    
36from webkitpy.w3c.test_converter import W3CTestConverter
                    
185
                    
186        doc = BeautifulSoup(test_html)
                    
187        oc = OutputCapture()
                    
266        try:
                    
267            converted = converter.convert_prefixed_properties(BeautifulSoup(test_content[1]), DUMMY_FILENAME)
                    
268        finally:
                    
281        if isinstance(converted, basestring):
                    
282            converted = BeautifulSoup(converted)
                    
283
                    
                
mastodon.scm https://gitlab.com/daym/guix | Scheme | 174 lines
                    
60    (inputs
                    
61     `(("python-beautifulsoup4" ,python-beautifulsoup4)
                    
62       ("python-requests" ,python-requests)
                    
                
browser.py https://bitbucket.org/chahaojia/eve_moniter.git | Python | 236 lines
                    
87        import BeautifulSoup
                    
88        return BeautifulSoup.BeautifulSoup(self.data)
                    
89
                    
                
get_legislation.py https://github.com/jdunck/fiftystates.git | Python | 112 lines
                    
4import datetime as dt
                    
5from BeautifulSoup import BeautifulSoup
                    
6
                    
18
                    
19        # Get the details page and parse it with BeautifulSoup. These
                    
20        # pages contain a malformed 'p' tag that (certain versions of)
                    
23        details_raw = details_raw.replace('<P ALIGN=CENTER">', '')
                    
24        details = BeautifulSoup(details_raw)
                    
25
                    
26        # Get the history page (following a link from the details page).
                    
27        # Once again, we remove tags that BeautifulSoup chokes on
                    
28        # (including all meta tags, because bills with quotation marks
                    
35        history_raw = rem_meta.sub('</title></head>', history_raw)
                    
36        history = BeautifulSoup(history_raw)
                    
37
                    
                
lec_04_scraping.ipynb https://gitlab.com/xbsd/content | Jupyter | 379 lines
                    
23      "from pattern import web\n",
                    
24      "from BeautifulSoup import BeautifulSoup"
                    
25     ],
                    
261     "source": [
                    
262      "# Using BeautifulSoup"
                    
263     ]
                    
268     "input": [
                    
269      "bs = BeautifulSoup(r.text)\n",
                    
270      "for movie in bs.findAll('td', 'title'):\n",
                    
                
ford.rb https://gitlab.com/0072016/homebrew-core | Ruby | 153 lines
                    
25
                    
26  resource "beautifulsoup4" do
                    
27    url "https://pypi.python.org/packages/26/79/ef9a8bcbec5abc4c618a80737b44b56f1cb393b40238574078c5002b97ce/beautifulsoup4-4.4.1.tar.gz"
                    
67    ENV.prepend_create_path "PYTHONPATH", libexec/"vendor/lib/python2.7/site-packages"
                    
68    deps = %w[beautifulsoup4 graphviz Jinja2 Markdown markdown-include MarkupSafe Pygments toposort]
                    
69    deps << "lxml" if build.with? "lxml"
                    
                
examples.py https://github.com/towerjoo/django-test-extensions.git | Python | 112 lines
                    
103    def test_using_beautiful_soup(self):
                    
104        "Example test for content on a given view, this time using the BeautifulSoup parser"
                    
105        response = self.client.get('/example/')
                    
105        response = self.client.get('/example/')
                    
106        soup = BeautifulSoup(response.content)
                    
107        self.assert_equal("Page Title", soup.find("title").string.strip())
                    
                
feed.py https://bitbucket.org/milos07p/pypsd-nao-on-git.git | Python | 194 lines
                    
6from BaseHTTPServer import BaseHTTPRequestHandler
                    
7from BeautifulSoup import BeautifulSoup
                    
8from decimal import Decimal
                    
92	def get_soup(self):
                    
93		return BeautifulSoup(self._html, convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
94
                    
                
mangafox.py https://bitbucket.org/antoinealb/mangafox.py.git | Python | 150 lines
                    
8import requests
                    
9from bs4 import BeautifulSoup
                    
10import os
                    
34    """
                    
35    Returns a BeautifulSoup instance made with the HTML of the page at url.
                    
36    """
                    
37    page = requests.get(url)
                    
38    return BeautifulSoup(page.text)
                    
39
                    
                
lxml.html.ElementSoup-module.html https://github.com/jcrobak/hue.git | HTML | 278 lines
                    
64<h1 class="epydoc">Module ElementSoup</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.ElementSoup-pysrc.html">source&nbsp;code</a></span></p>
                    
65Legacy interface to the BeautifulSoup HTML parser.
                    
66
                    
91        <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br />
                    
92      Convert a BeautifulSoup tree to a list of Element trees.</td>
                    
93          <td align="right" valign="top">
                    
108          <td><span class="summary-sig"><a name="parse"></a><span class="summary-sig-name">parse</span>(<span class="summary-sig-arg">file</span>,
                    
109        <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
                    
110        <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span></td>
                    
141    </td><td class="summary">
                    
142        <a href="lxml.html.ElementSoup-module.html#__doc__" class="summary-name" onclick="show_private();">__doc__</a> = <code title="&quot;&quot;&quot;Legacy interface to the BeautifulSoup HTML parser.
                    
143&quot;&quot;&quot;">&quot;&quot;&quot;Legacy interface to the BeautifulSoup HTML pars<code class="variable-ellipsis">...</code></code>
                    
179  
                    
180  <p>Convert a BeautifulSoup tree to a list of Element trees.</p>
                    
181<p>Returns a list instead of a single root Element to support
                    
                
get-data-2016.ipynb https://bitbucket.org/aabtzu/mlkaggle.git | Jupyter | 410 lines
                    
25    "url = \"http://www.fftoday.com/nfl/schedule.php\"\n",
                    
26    "soup = bs4.BeautifulSoup(requests.get(url).text)\n",
                    
27    "df = pandas.read_html(str(soup.find_all('table')[8]))[0]\n",
                    
                
bills.py https://github.com/runderwood/openstates.git | Python | 293 lines
                    
3
                    
4from BeautifulSoup import BeautifulSoup
                    
5
                    
33            with self.urlopen(index_file) as doc:
                    
34                soup = BeautifulSoup(cleanup_html(doc))
                    
35
                    
123        with self.urlopen(url) as doc:
                    
124            soup = BeautifulSoup(doc)
                    
125            date=None
                    
                
get_manmankan_images.py https://github.com/mitnk/stuff.git | Python | 143 lines
                    
12
                    
13from BeautifulSoup import BeautifulSoup
                    
14
                    
33    page = urllib2.urlopen(url)
                    
34    soup = BeautifulSoup(page, fromEncoding="gb18030")
                    
35    print u"Reading information of %s ..." % soup.findAll("h1")[0].string
                    
58    page = urllib2.urlopen(url)
                    
59    soup = BeautifulSoup(page)
                    
60    javascripts = soup.findAll(text=lambda text: text.parent.name == "script")
                    
                
Makefile https://gitlab.com/lokiexinferis/vim-configs | Makefile | 82 lines
                    
78	virtualenv build/html2vimdoc
                    
79	build/html2vimdoc/bin/pip install beautifulsoup coloredlogs==4.0 markdown
                    
80
                    
                
tracker.py https://gitlab.com/jan.raddatz/myimmitracker-analyzer | Python | 163 lines
                    
3#pip install --proxy proxy:8080 bs4
                    
4from bs4 import BeautifulSoup
                    
5# pip install --proxy proxy:8080 ansicolors
                    
80#	r = requests.get(hostname + url_to_scrape, proxies=proxyDict)
                    
81	soup = BeautifulSoup(r.text, 'html.parser')
                    
82	all_tables = soup.find_all('table')
                    
                
module-tree.html https://github.com/jcrobak/hue.git | HTML | 170 lines
                    
73    <ul>
                    
74    <li> <strong class="uidlink"><a href="lxml.html.ElementSoup-module.html">lxml.html.ElementSoup</a></strong>: <em class="summary">Legacy interface to the BeautifulSoup HTML parser.</em>    </li>
                    
75    <li class="private"> <strong class="uidlink">lxml.html._dictmixin</strong>    </li>
                    
82    <li> <strong class="uidlink"><a href="lxml.html.html5parser-module.html">lxml.html.html5parser</a></strong>: <em class="summary">An interface to html5lib.</em>    </li>
                    
83    <li> <strong class="uidlink"><a href="lxml.html.soupparser-module.html">lxml.html.soupparser</a></strong>: <em class="summary">External interface to the BeautifulSoup HTML parser.</em>    </li>
                    
84    <li> <strong class="uidlink"><a href="lxml.html.usedoctest-module.html">lxml.html.usedoctest</a></strong>: <em class="summary">Doctest module for HTML comparison.</em>    </li>
                    
                
Makefile https://github.com/freebsd/freebsd-ports.git | Makefile | 90 lines
                    
49EXCEL_DESC=	MS Excel I/O Add-ons
                    
50HTML5LIB_DESC=	Parse HTML with www/py-html5lib and www/py-beautifulsoup
                    
51HTML_DESC=	HTML Parsing/Generation Add-ons
                    
52JINJA2_DESC=	Support conditional HTML formatting with devel/py-Jinja2
                    
53LXML_DESC=	Parse HTML with devel/py-lxml and www/py-beautifulsoup
                    
54MPL_DESC=	Support graphical plotting output via math/py-matplotlib
                    
72BTLNCK_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}bottleneck>=1.2.0:math/py-bottleneck@${PY_FLAVOR}
                    
73HTML5LIB_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \
                    
74			${PYTHON_PKGNAMEPREFIX}html5lib>0:www/py-html5lib@${PY_FLAVOR}
                    
75JINJA2_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}Jinja2>0:devel/py-Jinja2@${PY_FLAVOR}
                    
76LXML_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \
                    
77			${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR}
                    
                
Parser.py https://gitlab.com/Fremis/IRCourse | Python | 276 lines
                    
7
                    
8from bs4 import BeautifulSoup, NavigableString
                    
9import gc
                    
74            try:
                    
75                soup = BeautifulSoup(open_file, 'lxml')
                    
76            except UnicodeDecodeError:
                    
                
ParseHtmlfromFile.py https://github.com/PuercoPop/EleccionesPeru.git | Python | 85 lines
                    
3
                    
4from BeautifulSoup import BeautifulSoup
                    
5import pdb
                    
16 
                    
17soup = BeautifulSoup( f_handle )
                    
18a = soup.findAll('tr',height="40")
                    
23for item in soup.findAll('tr'):
                    
24  for item2 in BeautifulSoup(str(item)).findAll('span',{'class':'arial_contenido_negrita'}):
                    
25    if T_Flag == True:
                    
36
                    
37  for item2 in BeautifulSoup(str(item)).findAll('span',{'class':'arial_contenido'}):
                    
38    if T_Flag == True:
                    
79    #print item.contents
                    
80#b = BeautifulSoup.BeautifulSoup(str(a))
                    
81#c = BeautifulSoup.BeautifulSoup( str( b.find('td',align="left" ) ) )
                    
                
testFunctional.py https://github.com/bogtan/Naaya.git | Python | 221 lines
                    
22from copy import deepcopy
                    
23from BeautifulSoup import BeautifulSoup
                    
24
                    
153        html = self.browser.get_html()
                    
154        soup = BeautifulSoup(html)
                    
155
                    
                
primewire.py https://bitbucket.org/Leia18/gmc.git | Python | 207 lines
                    
5
                    
6from BeautifulSoup import BeautifulSoup
                    
7from universalscrapers import proxy
                    
28        try:
                    
29            html = BeautifulSoup(self.get_html(title, self.moviesearch_link))
                    
30            index_items = html.findAll('div', attrs={'class': 'index_item index_item_ie'})
                    
68        try:
                    
69            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
                    
70            index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
                    
89                            show_url = urlparse.urljoin(self.base_link, href)
                    
90                            html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))
                    
91
                    
123        html = proxy.get(url, 'searchform')
                    
124        parsed_html = BeautifulSoup(html)
                    
125        key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]
                    
                
citotron.py https://gitlab.com/maxigas/citotron.git | Python | 265 lines
                    
8from args import args
                    
9from bs4 import BeautifulSoup as bs
                    
10from collections import Counter as counter
                    
                
views.py https://github.com/dotKom/studlan.git | Python | 310 lines
                    
4
                    
5from bs4 import BeautifulSoup
                    
6
                    
77
                    
78    dom = BeautifulSoup(seating.layout.template, 'html.parser')
                    
79    seat_counter = 0
                    
                
threading_url.py https://bitbucket.org/cheng123/mytools.git | Python | 114 lines
                    
3import time,math,os,re,urllib,urllib2,cookielib
                    
4from BeautifulSoup import BeautifulSoup
                    
5import time
                    
                
crawler.py https://bitbucket.org/Meister17/wiki-posting-list.git | Python | 120 lines
                    
6import requests
                    
7import BeautifulSoup as BS
                    
8import nltk
                    
46    links = []
                    
47    soup = BS.BeautifulSoup(request.text)
                    
48    content = nltk.clean_html(request.text.encode('utf8'))
                    
                
HelpIndex.py https://github.com/esitarski/CrossMgr.git | Python | 98 lines
                    
9import re
                    
10from bs4 import BeautifulSoup
                    
11
                    
45	for f in glob.iglob( os.path.join(htmlDocDir, '*.html') ):
                    
46		doc = BeautifulSoup( open(f).read(), 'html.parser' )
                    
47		div = doc.find('div', class_='content')
                    
                
__init__.py https://github.com/theduke/sehistory.git | Python | 238 lines
                    
2
                    
3from libraries.BeautifulSoup import BeautifulSoup
                    
4
                    
152    def extractLogo(self, html):
                    
153        soup = BeautifulSoup(html)
                    
154    
                    
                
__init__.py https://github.com/junalmeida/Sick-Beard.git | Python | 355 lines
                    
3"The Screen-Scraper's Friend"
                    
4http://www.crummy.com/software/BeautifulSoup/
                    
5
                    
15documentation:
                    
16http://www.crummy.com/software/BeautifulSoup/bs4/doc/
                    
17"""
                    
23
                    
24__all__ = ['BeautifulSoup']
                    
25
                    
48
                    
49class BeautifulSoup(Tag):
                    
50    """
                    
91                "BS4 does not respect the convertEntities argument to the "
                    
92                "BeautifulSoup constructor. Entities are always converted "
                    
93                "to Unicode characters.")
                    
                
soup.py https://github.com/mozilla/affiliates-lib.git | Python | 228 lines
                    
2
                    
3warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning)
                    
4
                    
4
                    
5from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration
                    
6
                    
140        if namespaceHTMLElements:
                    
141            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
                    
142        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
                    
144    def documentClass(self):
                    
145        self.soup = BeautifulSoup("")
                    
146        return Element(self.soup, self.soup, None)
                    
162        if namespace is not None:
                    
163            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
                    
164        return Element(Tag(self.soup, name), self.soup, namespace)
                    
                
generate_featured_pages.py https://github.com/greeness/trendingtopics.git | Python | 153 lines
                    
13import urllib2
                    
14from BeautifulSoup import BeautifulSoup
                    
15import datetime
                    
65  page = opener.open( url ).read()
                    
66  soup = BeautifulSoup(page)
                    
67  return soup
                    
                
filter.py https://gitlab.com/cobhuni/hadith_alislam_extractor | Python | 197 lines
                    
31from argparse import ArgumentParser
                    
32from bs4 import BeautifulSoup
                    
33import multiprocessing as mp
                    
68    Args:
                    
69        t (class 'bs4.BeautifulSoup'): html to parse
                    
70        fn (str): filename, to trace errors.
                    
72    Returns:
                    
73        class 'bs4.BeautifulSoup': cell containing the text
                    
74    
                    
117    with open(os.path.join(args.input_dir, fname)) as inf:
                    
118        soup = BeautifulSoup(inf.read(),'lxml')
                    
119
                    
                
_html5lib.py https://bitbucket.org/apyhtri/irc-bot1.git | Python | 222 lines
                    
82    def fragmentClass(self):
                    
83        self.soup = BeautifulSoup("")
                    
84        self.soup.name = "[document_fragment]"
                    
                
wiki_semantic.py https://bitbucket.org/shishirk/geopy.git | Python | 108 lines
                    
7try:
                    
8    from BeautifulSoup import BeautifulSoup
                    
9except ImportError:
                    
9except ImportError:
                    
10    util.logger.warn("BeautifulSoup was not found. " \
                    
11          "The SemanticMediaWiki geocoder will not work.")
                    
31        """Parse the URL of the RDF link from the <head> of ``page``."""
                    
32        soup = BeautifulSoup(page)
                    
33        link = soup.head.find('link', rel='alternate', type=mime_type)
                    
81        page = urlopen(url)
                    
82        soup = BeautifulSoup(page)
                    
83
                    
                
PYopLib.py https://bitbucket.org/y0no/pyopmail.git | Python | 78 lines
                    
4from os.path import join
                    
5from bs4 import BeautifulSoup as bs4
                    
6from bs4 import Comment
                    
                
requirements_txt_linker_spec.rb https://gitlab.com/YarNayar/gitlab-ce | Ruby | 95 lines
                    
28        nose-cov
                    
29        beautifulsoup4
                    
30        #
                    
70      expect(subject).to include(link('nose-cov', 'https://pypi.python.org/pypi/nose-cov'))
                    
71      expect(subject).to include(link('beautifulsoup4', 'https://pypi.python.org/pypi/beautifulsoup4'))
                    
72      expect(subject).to include(link('docopt', 'https://pypi.python.org/pypi/docopt'))
                    
                
ieo.py https://gitlab.com/rithvikvibhu/batch-sof | Python | 71 lines
                    
3import pprint
                    
4from bs4 import BeautifulSoup
                    
5
                    
47	
                    
48	soup = BeautifulSoup(r.text, "html5lib")                                                            # Soup up html
                    
49	table_data = [[cell.text for cell in row("td")]
                    
49	table_data = [[cell.text for cell in row("td")]
                    
50		for row in BeautifulSoup(r.text, "html5lib")("tr")]
                    
51		
                    
                
prototype_to_cix.py https://gitlab.com/Smileyt/KomodoEdit | Python | 295 lines
                    
48Requirements:
                    
49  * BeautifulSoup   (http://www.crummy.com/software/BeautifulSoup/)
                    
50  * cElementTree    (http://effbot.org/downloads/#cElementTree)
                    
65
                    
66from BeautifulSoup import BeautifulSoup, NavigableString
                    
67
                    
260    data = getPrototypeDocsFromWebpage()
                    
261    soup = BeautifulSoup(data)
                    
262    cix_root = createCixRoot(name="Prototype", description="JavaScript framework for web development")
                    
                
legacy.py https://github.com/jlongman/xbmc-hockeystreams-plugin.git | Python | 174 lines
                    
3
                    
4from BeautifulSoup import BeautifulSoup
                    
5import xbmcplugin, xbmcaddon, xbmcgui
                    
                
settings.py https://github.com/knabar/fynbos.git | Python | 64 lines
                    
50# the backend to use when parsing the JavaScript or Stylesheet files
                    
51PARSER = getattr(settings, 'COMPRESS_PARSER', 'compressor.parser.BeautifulSoupParser')
                    
52
                    
                
test_archives.py https://gitlab.com/Acidburn0zzz/hyperkitty | Python | 341 lines
                    
33from mock import Mock
                    
34from bs4 import BeautifulSoup
                    
35from django.contrib.auth.models import User
                    
317        """
                    
318        soup = BeautifulSoup(html, "html.parser")
                    
319        months_list = soup.find(id="months-list")
                    
                
run.py https://bitbucket.org/skywalking/loginparttimesystem.git | Python | 118 lines
                    
4import cookielib, optparse, setting, urllib, urllib2, sys
                    
5from BeautifulSoup import BeautifulSoup
                    
6from datetime import datetime
                    
28  info = ()
                    
29  bs = BeautifulSoup(content).findAll('tr')[project]
                    
30  v = bs.findAll('td')
                    
35def parse_signout(content):
                    
36  bs = BeautifulSoup(content).find('div', {'id': 'body'})
                    
37  if bs.text == '您沒有簽到記錄,無法進行簽退 ....':
                    
40    info = ()
                    
41    v = BeautifulSoup(content).findAll('td')
                    
42    k = BeautifulSoup(content).find('input', {'name': 'signout'})
                    
                
toc.py https://gitlab.com/janninematt/janninematt | Python | 145 lines
                    
12
                    
13from bs4 import BeautifulSoup, Comment
                    
14
                    
119    tree = node = HtmlTreeNode(None, title, 'h0', '')
                    
120    soup = BeautifulSoup(content._content, 'html.parser')
                    
121    settoc = False
                    
137        tree_string = '{}'.format(tree)
                    
138        tree_soup = BeautifulSoup(tree_string, 'html.parser')
                    
139        content.toc = tree_soup.decode(formatter='html')
                    
                
get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 221 lines
                    
15    soup_parser = html5lib.HTMLParser(
                    
16        tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse
                    
17
                    
                
build.py https://gitlab.com/imbest91/grapejuice | Python | 537 lines
                    
16import yaml
                    
17from bs4 import BeautifulSoup
                    
18from jinja2 import Environment, FileSystemLoader, select_autoescape
                    
261
                    
262        md_soup = BeautifulSoup(rendered_markdown, "lxml")
                    
263        summarizer = Summarizer(break_pads=["[summary-snip]"])
                    
357
                    
358    soup = BeautifulSoup(content, "html5lib")
                    
359
                    
                
default.py https://gitlab.com/billyprice1/husham.com | Python | 280 lines
                    
10import requests
                    
11from BeautifulSoup import BeautifulSoup as bs
                    
12from utils.webutils import *
                    
                
main.py https://gitlab.com/smidaharoun/devoirTunisiePython | Python | 197 lines
                    
3
                    
4from bs4 import BeautifulSoup
                    
5from flask import Flask, jsonify
                    
13page = urllib2.urlopen(main)
                    
14soup = BeautifulSoup(page, 'html.parser')
                    
15soup.prettify()
                    
68    page_level = urllib2.urlopen(url_level)
                    
69    soup_level = BeautifulSoup(page_level, 'html.parser')
                    
70    soup_level.prettify()
                    
100    page_level = urllib2.urlopen(url_level)
                    
101    soup_level = BeautifulSoup(page_level, 'html.parser')
                    
102    soup_level.prettify()
                    
132    page_level = urllib2.urlopen(url_level)
                    
133    soup_level = BeautifulSoup(page_level, 'html.parser')
                    
134    soup_level.prettify()
                    
                
selectors.rst https://github.com/noplay/scrapy.git | ReStructuredText | 380 lines
                    
10
                    
11 * `BeautifulSoup`_ is a very popular screen scraping library among Python
                    
12   programmers which constructs a Python object based on the
                    
36
                    
37.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
                    
38.. _lxml: http://codespeak.net/lxml/
                    
                
editor.py https://gitlab.com/dannywillems/geeknote | Python | 259 lines
                    
5import tempfile
                    
6from bs4 import BeautifulSoup, NavigableString
                    
7import threading
                    
56
                    
57        # soup.select cant be used with dashes: https://bugs.launchpad.net/beautifulsoup/+bug/1276211
                    
58        for todo in soup.find_all('en-todo'):
                    
78    def ENMLtoText(contentENML):
                    
79        soup = BeautifulSoup(contentENML.decode('utf-8'))
                    
80
                    
115        '''
                    
116        Transforms github style checklists `* [ ]` in the BeautifulSoup tree to
                    
117        enml.
                    
172
                    
173              soup = BeautifulSoup(contentHTML, 'html.parser')
                    
174              Editor.checklistInSoupToENML(soup)
                    
                
testFunctional.py https://github.com/eaudeweb/Naaya.git | Python | 151 lines
                    
1import re
                    
2from BeautifulSoup import BeautifulSoup
                    
3
                    
141        html = self.browser.get_html()
                    
142        soup = BeautifulSoup(html)
                    
143
                    
                
build.py https://code.google.com/p/python-for-android/ | Python | 204 lines
                    
105print 'Installing BeautifulSoup.'
                    
106beautifulsoup_path = os.path.join(pwd, 'python-libs','BeautifulSoup')
                    
107compileall.compile_dir(beautifulsoup_path)
                    
107compileall.compile_dir(beautifulsoup_path)
                    
108shutil.copy(os.path.join(beautifulsoup_path, 'BeautifulSoup.pyc'),
                    
109            'output/usr/lib/python2.6/BeautifulSoup.pyc')
                    
                
README.rst https://github.com/liberation/django_compressor.git | ReStructuredText | 71 lines
                    
36is done using lxml_ or if it's not available Python's built-in HTMLParser by
                    
37default. As an alternative Django Compressor provides a BeautifulSoup_ and a
                    
38html5lib_ based parser, as well as an abstract base class that makes it easy to
                    
58
                    
59.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
                    
60.. _lxml: http://lxml.de/
                    
                
PROJECT_IDEAS.md https://gitlab.com/ini-python-lab-course/ss16 | Markdown | 126 lines
                    
49* [import.io](https://import.io/): Service that extracts data from websites
                    
50* [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/): Convenient access to content of a downloaded website
                    
51* [Scrapy](http://scrapy.org/): Framework for scraping websites
                    
                
utils.py https://github.com/smetsjp/erp5.git | Python | 236 lines
                    
211    def parse_declaration(self, i):
                    
212        """Fix handling of CDATA sections. Code borrowed from BeautifulSoup.
                    
213        """
                    
                
single.py https://gitlab.com/skororu/pysnippets | Python | 67 lines
                    
8
                    
9import bs4                # BeautifulSoup
                    
10import requests           # codes.ok, get
                    
29        req = requests.get(url)
                    
30        page = bs4.BeautifulSoup(req.text, 'lxml')
                    
31
                    
45    with requests.get(base_url) as req:
                    
46        page = bs4.BeautifulSoup(req.text, 'lxml')
                    
47        num_previous = page.find('a', rel='prev')['href']
                    
                
independent.py https://gitlab.com/harrigan/TPP | Python | 36 lines
                    
5from crimespider.items import CrimeItem
                    
6from bs4 import BeautifulSoup
                    
7
                    
30            article += c.extract()
                    
31        s = BeautifulSoup(article, 'lxml')
                    
32        print( s.get_text() )
                    
                
flickr.py https://github.com/Br3nda/creepy.git | Python | 204 lines
                    
24import re
                    
25from BeautifulSoup import BeautifulSoup as bs
                    
26
                    
50        '''
                    
51        Removing some javascript that choked BeautifulSoup's parser
                    
52        '''
                    
                
geoserver.py https://github.com/nicopresto/webSkapes.git | Python | 104 lines
                    
2from urllib2 import urlopen
                    
3from BeautifulSoup import BeautifulStoneSoup
                    
4
                    
                
urlnorm.py https://github.com/e1ven/Lonava.git | Python | 256 lines
                    
28import urllib2
                    
29from BeautifulSoup import BeautifulSoup
                    
30import socket
                    
69        try:
                    
70            soup = BeautifulSoup(html)
                    
71            links = soup.findAll('link')
                    
                
urls.html https://github.com/msparks/pyhole.git | HTML | 155 lines
                    
64
                    
65<span class="kn">from</span> <span class="nn">BeautifulSoup</span> <span class="kn">import</span> <span class="n">BeautifulSoup</span>
                    
66
                    
105
                    
106        <span class="n">soup</span> <span class="o">=</span> <span class="n">BeautifulSoup</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
                    
107
                    
                
scrape-stock-index.py https://bitbucket.org/pombredanne/stock-index-scraper.git | Python | 238 lines
                    
22from collections import namedtuple
                    
23from BeautifulSoup import BeautifulSoup
                    
24
                    
115    """
                    
116    Creates the BeautifulSoup instance from the given html, locates the main
                    
117    table, then processes each row as an individual stock entry.
                    
118    """
                    
119    soup = BeautifulSoup(html)
                    
120    table = find_main_table(soup)
                    
128    Attempts to find the stock table in the page html, returns None or 
                    
129    a BeautifulSoup instance for the table.
                    
130    """
                    
134    """
                    
135    Takes a list of table rows (tr) as BeautifulSoup instances where one row
                    
136    contains the data for one stock entry. Uses helper functions to extract
                    
                
version_check.py https://gitlab.com/mimizone/kolla | Python | 126 lines
                    
19
                    
20from bs4 import BeautifulSoup as bs
                    
21from oslo_config import cfg
                    
                
bugzilla_unittest.py https://github.com/weissms/owb-mirror.git | Python | 296 lines
                    
34
                    
35from modules.BeautifulSoup import BeautifulSoup
                    
36
                    
204        bugzilla = Bugzilla()
                    
205        soup = BeautifulSoup(self._example_attachment)
                    
206        attachment_element = soup.find("attachment")
                    
                
post.py https://github.com/langner/mmqc.git | Python | 342 lines
                    
22import logging
                    
23import BeautifulSoup
                    
24
                    
147             """Retrieve excerpt from article"""
                    
148             s = BeautifulSoup.BeautifulSoup(self.content)
                    
149             # get rid of javascript, noscript and css
                    
                
helper.py https://github.com/macdylan/LBForum.git | Python | 44 lines
                    
2# -*- coding: UTF-8 -*-
                    
3from BeautifulSoup import BeautifulSoup, NavigableString
                    
4from django.conf import settings
                    
31def clean_html( fragment ):
                    
32    soup = BeautifulSoup( fragment.strip() )
                    
33    def cleanup( soup ):
                    
                
README.rst https://gitlab.com/gallaecio/chakraversiontracker | ReStructuredText | 245 lines
                    
18
                    
19- `beautifulsoup4 <https://www.crummy.com/software/BeautifulSoup/bs4/doc/>`_
                    
20
                    
                
readability.py https://gitlab.com/zouxc/cola | Python | 368 lines
                    
31except ImportError:
                    
32    raise DependencyNotInstalledError("BeautifulSoup4")
                    
33
                    
                
ensembl_remote_rest.py https://github.com/kdaily/bcbb.git | Python | 226 lines
                    
17
                    
18from BeautifulSoup import BeautifulSoup
                    
19from Bio import SeqIO
                    
110                organism, gene_id, tx_id) as in_handle:
                    
111            soup = BeautifulSoup(in_handle)
                    
112            stats_possibilities = soup.findAll("dl", "summary")
                    
128                gene_id, tx_id) as in_handle:
                    
129            soup = BeautifulSoup(in_handle)
                    
130            domain_table = soup.find("table", "ss autocenter")
                    
157                gene_id) as in_handle:
                    
158            soup = BeautifulSoup(in_handle)
                    
159            tx_info = soup.find("table", {"id" : "transcripts"})
                    
181                organism, gene_id) as in_handle:
                    
182            soup = BeautifulSoup(in_handle)
                    
183            orth_table = soup.find("table", "orthologues")
                    
                
faq.rst https://gitlab.com/oytunistrator/scrapy | ReStructuredText | 286 lines
                    
5
                    
6How does Scrapy compare to BeautifulSoup or lxml?
                    
7-------------------------------------------------
                    
8
                    
9`BeautifulSoup`_ and `lxml`_ are libraries for parsing HTML and XML. Scrapy is
                    
10an application framework for writing web spiders that crawl web sites and
                    
13Scrapy provides a built-in mechanism for extracting data (called
                    
14:ref:`selectors <topics-selectors>`) but you can easily use `BeautifulSoup`_
                    
15(or `lxml`_) instead, if you feel more comfortable working with them. After
                    
18
                    
19In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like
                    
20comparing `jinja2`_ to `Django`_.
                    
21
                    
22.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
                    
23.. _lxml: http://lxml.de/
                    
                
dork.py https://bitbucket.org/badc0re/xsser_gsoc.git | Python | 273 lines
                    
26urllib2.socket.setdefaulttimeout(5.0)
                    
27from BeautifulSoup import BeautifulSoup
                    
28
                    
230            try:
                    
231                soup = BeautifulSoup(html_data, fromEncoding=encoding)
                    
232            except Exception, e:
                    
                
ba.py https://github.com/barttenbrinke/Bartsidee-Repository.git | Python | 328 lines
                    
13import cPickle as pickle
                    
14from beautifulsoup.BeautifulSoup import BeautifulSoup
                    
15
                    
125    data = FetchUrl(samiurl, 0)
                    
126    soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml")
                    
127    i = 1
                    
164    data = FetchUrl(path)
                    
165    soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml")
                    
166    i = 1
                    
                
ultimate.py https://gitlab.com/eientei95/crunchy-xml-decoder | Python | 351 lines
                    
18import altfuncs
                    
19from bs4 import BeautifulSoup
                    
20from crunchyDec import CrunchyDec
                    
231    media_id = page_url[-6:]
                    
232    xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoPlayer_GetStandardConfig', media_id), 'xml')
                    
233
                    
249        media_id = xmlconfig.find('media_id').string
                    
250        xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoEncode_GetStreamInfo', media_id), 'xml')
                    
251        host = xmlconfig.find('host').string
                    
                
adapter_twcslibrarynet.py https://code.google.com/p/fanficdownloader/ | Python | 273 lines
                    
25
                    
26from .. import BeautifulSoup as bs
                    
27from ..htmlcleanup import stripHTML
                    
136        
                    
137        # use BeautifulSoup HTML parser to make everything easier to find.
                    
138        soup = bs.BeautifulSoup(data)
                    
227
                    
228            # use BeautifulSoup HTML parser to make everything easier to find.
                    
229            seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
                    
252
                    
253        chapter=bs.BeautifulSoup('<div class="story"></div>')
                    
254        
                    
254        
                    
255        soup = bs.BeautifulSoup(data)
                    
256
                    
                
models.py https://github.com/agiliq/Dinette.git | Python | 375 lines
                    
10import hashlib
                    
11from BeautifulSoup import BeautifulSoup
                    
12import datetime
                    
260    def htmlfrombbcode(self):
                    
261        soup = BeautifulSoup(self.message.raw)
                    
262        #remove all html tags from the message
                    
                
yes24_script.py https://github.com/jangxyz/yes24.git | Python | 193 lines
                    
4import urllib, urllib2, cookielib
                    
5from BeautifulSoup import BeautifulSoup
                    
6from datetime import datetime
                    
77    # parse
                    
78    soup = BeautifulSoup(text)
                    
79    order_list_table     = soup.table(id="MyOrderListTbl")[0]
                    
125    # parse
                    
126    soup = BeautifulSoup(text)
                    
127    order_price = soup.find(id="CLbTotOrdAmt").b.string
                    
129    text = '<table>' + text[text[1:].find('<')+1:-7] + '</table>'
                    
130    soup = BeautifulSoup(text)
                    
131    point_saved = soup.find(attrs={'class':"price"}).b.string
                    
                
readme.md https://github.com/atomia/atomia-nagios-plugins.git | Markdown | 343 lines
                    
30* **WWW::Mechanize** (on ubuntu, just `apt-get install libwww-mechanize-perl`)
                    
31* **BeautifulSoup4** (ubuntu: `apt-get install python-bs4 python3-bs4`)
                    
32
                    
                
zad_7.py https://gitlab.com/mmeisel/LV | Python | 43 lines
                    
8import urllib
                    
9from bs4 import BeautifulSoup
                    
10
                    
26html=urllib.urlopen(urlAddr, "lxml").read() #otvara se url
                    
27soup=BeautifulSoup(html)                    #i deklarira objekt tipa BeautifulSoup
                    
28
                    
                
scraping-the-web.rst https://github.com/toastdriven/pydanny-event-notes.git | ReStructuredText | 109 lines
                    
44
                    
45 * BeautifulSoup is old and not maintained anymore
                    
46 * html5lib 
                    
46 * html5lib 
                    
47    - builds BeautifulSoup objects
                    
48    - builds elementTrees
                    
                
_lxml.py https://bitbucket.org/bendikro/deluge-yarss-plugin.git | Python | 296 lines
                    
60    def initialize_soup(self, soup):
                    
61        """Let the BeautifulSoup object know about the standard namespace
                    
62        mapping.
                    
67    def _register_namespaces(self, mapping):
                    
68        """Let the BeautifulSoup object know about namespaces encountered
                    
69        while parsing the document.
                    
74            if key and key not in self.soup._namespaces:
                    
75                # Let the BeautifulSoup object know about a new namespace.
                    
76                # If there are multiple namespaces defined with the same
                    
189
                    
190            # First, Let the BeautifulSoup object know about it.
                    
191            self._register_namespaces(nsmap)
                    
                
__init__.py https://bitbucket.org/yourcelf/old-intertwinkles.git | Python | 163 lines
                    
19from django_browserid import get_audience
                    
20from bs4 import BeautifulSoup
                    
21
                    
93        self.assertTrue("test@example.com" in res.content)
                    
94        soup = BeautifulSoup(res.content)
                    
95        self.assertEquals(soup.find(id="id_email").get("value"),
                    
104        self.assertFalse("This address is unconfirmed" in res.content)
                    
105        soup = BeautifulSoup(res.content)
                    
106        self.assertEquals(soup.find(id="id_email").get("value"),
                    
                
nrlbot.py https://gitlab.com/g.davis13/nrlbot | Python | 344 lines
                    
35
                    
36from bs4 import BeautifulSoup
                    
37from collections import namedtuple
                    
100    r = requests.get(url)
                    
101    soup = BeautifulSoup(r.text, 'html.parser')
                    
102    return soup
                    
                
index.html https://github.com/larsks/blog.oddbit.com.git | HTML | 240 lines
                    
6        <title>Recent answers on StackOverflow &middot; The Odd Bit</title>
                    
7        <meta name="description" content="Traefik different ports for different Docker containers docker docker-compose traefik  git push can not find -o option git  Interact with podman docker via socket in Redhat 9 docker redhat podman  Capturing commented text in an XML python xml beautifulsoup xml-comments  How to execute a shell script as input on an interactive bash pod in Kubernetes? bash shell kubernetes  Docker : Opensearch refuses connection with the example in opensearch documentation in docker python-3.">
                    
8        <meta name="HandheldFriendly" content="True">
                    
142<li><p><a class="sx-answer sx-answer-accepted" href="https://stackoverflow.com/questions/72681436/capturing-commented-text-in-an-xml/72681822#72681822">Capturing commented text in an XML</a>
                    
143  <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/python">python</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml">xml</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/beautifulsoup">beautifulsoup</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml-comments">xml-comments</a> </p></li>
                    
144
                    
                
conversation.py https://gitlab.com/sanchezfauste/TweetDigraph | Python | 108 lines
                    
1from bs4 import BeautifulSoup
                    
2import requests
                    
95		if req.status_code == 200:
                    
96			html = BeautifulSoup(req.text, 'html.parser')
                    
97			conversations = html.find_all('li', {'class':'ThreadedConversation'})
                    
                
base_fetcher.py https://bitbucket.org/filmaster/filmaster-stable/ | Python | 193 lines
                    
3
                    
4from beautifulsoup import BeautifulSoup
                    
5from optparse import make_option
                    
19
                    
20BeautifulSoup.MARKUP_MASSAGE += [
                    
21    (re.compile(r"<[^>]+>"), lambda tag:quote_re.sub(r"\1 \2", tag.group(0))),
                    
65    def soup(self, data):
                    
66        return BeautifulSoup(data)
                    
67
                    
                
scrape.py https://github.com/sneeu/aliss_scrapers.git | Python | 113 lines
                    
7
                    
8from BeautifulSoup import BeautifulSoup
                    
9
                    
32    html = re.sub('<script.*?>[\s\S]*?</.*?script>', '', html)
                    
33    soup = BeautifulSoup(html)
                    
34
                    
                
podnapisi.py https://github.com/abenea/subliminal.git | Python | 150 lines
                    
87        :return: the response
                    
88        :rtype: :class:`xml.etree.ElementTree.Element` or :class:`bs4.BeautifulSoup`
                    
89
                    
96        else:
                    
97            return bs4.BeautifulSoup(r.content, ['permissive'])
                    
98
                    
                
bills.py https://github.com/runderwood/openstates.git | Python | 138 lines
                    
5import urllib2
                    
6from BeautifulSoup import BeautifulSoup
                    
7
                    
12
                    
13It includes a spurious </HEAD> before the useful data begins and lines like '<option value="Bill"selected="selected">Bill</option>', in which the lack of a space between the attributes confuses BeautifulSoup.
                    
14'''
                    
69                    return False
                    
70                soup = BeautifulSoup(cleansource(data))
                    
71                rows = soup.findAll('table')[1].findAll('tr')[1:]
                    
91        with self.urlopen(histurl) as data:
                    
92            soup = BeautifulSoup(cleansource(data))
                    
93            basicinfo = soup.findAll('div', id='bhistleft')[0]
                    
                
__init__.py https://github.com/junalmeida/Sick-Beard.git | Python | 96 lines
                    
40    treeType - the name of the tree type required (case-insensitive). Supported
                    
41               values are "simpletree", "dom", "etree" and "beautifulsoup"
                    
42               
                    
51                          ElementTree, cElementTree and lxml.etree).
                    
52                "beautifulsoup" - Beautiful soup (if installed)
                    
53               
                    
70            treeBuilderCache[treeType] = simpletree.TreeBuilder
                    
71        elif treeType == "beautifulsoup":
                    
72            import soup
                    
                
metacritic.py https://bitbucket.org/alex_fish/vgr.git | Python | 273 lines
                    
5from datetime import datetime
                    
6from BeautifulSoup import BeautifulSoup
                    
7from pprint import pprint
                    
70            return None
                    
71        soup = BeautifulSoup(html)
                    
72        i = 0
                    
124            return None
                    
125        soup = BeautifulSoup(html)
                    
126        prod = MetacriticInfo()
                    
                
hyperleech.py https://bitbucket.org/devinjames/hyperleech.git | Python | 407 lines
                    
24    import platform
                    
25    from bs4 import BeautifulSoup
                    
26    import requests
                    
279            loghandle = open(self.logfile, "r")
                    
280            self.soup = BeautifulSoup(loghandle.read())  # , 'xml')  # this contains the original soup, never changes.
                    
281            self.username = self.soup.find('user')
                    
286            print "No log file, creating new"
                    
287            self.xml = BeautifulSoup('<hyperleech>')  # , 'xml')
                    
288            self.soup = self.xml
                    
307    def write(self):
                    
308        if type(self.loghandle) is not file or type(self.soup) is not BeautifulSoup:
                    
309            print "Something with the log handle isn't initialized"
                    
                
lequipe_fr.py https://gitlab.com/edelans/scoragora | Python | 180 lines
                    
55	html = requests.get(url).text
                    
56	soup = BeautifulSoup.BeautifulSoup(html)
                    
57	# Get date
                    
85	html = requests.get(day_url).text
                    
86	soup = BeautifulSoup.BeautifulSoup(html)
                    
87	result = []
                    
119		html = requests.get(url).text
                    
120		soup = BeautifulSoup.BeautifulSoup(html)
                    
121		root = soup.find(attrs={'name': 'IDNIVEAU'})
                    
147		html = requests.get(url).text
                    
148		soup = BeautifulSoup.BeautifulSoup(html)
                    
149		soup.find(id="timeline")
                    
169		html = requests.get(url).text
                    
170		soup = BeautifulSoup.BeautifulSoup(html)
                    
171		hometeam_score = soup.find(id='scoDom')
                    
                
tpb.py https://github.com/SpLord/CouchPotato.git | Python | 160 lines
                    
4from dateutil.parser import parse
                    
5from imdb.parser.http.bsouplxml._bsoup import SoupStrainer, BeautifulSoup
                    
6from urllib import quote_plus
                    
63            tables = SoupStrainer('table')
                    
64            html = BeautifulSoup(data, parseOnlyThese = tables)
                    
65            resultTable = html.find('table', attrs = {'id':'searchResult'})
                    
154        div = SoupStrainer('div')
                    
155        html = BeautifulSoup(data, parseOnlyThese = div)
                    
156        html = html.find('div', attrs = {'class':'nfo'})
                    
                
searchengine.py https://github.com/kzfm1024/misc.git | Python | 306 lines
                    
1import urllib2
                    
2from BeautifulSoup import *
                    
3from urlparse import urljoin
                    
106        try:
                    
107          soup=BeautifulSoup(c.read())
                    
108          self.addtoindex(page,soup)
                    
                
vt_hash2filenames.py https://bitbucket.org/Vnoxygen/malformity.git | Python | 43 lines
                    
3import re
                    
4from BeautifulSoup import BeautifulSoup
                    
5from canari.maltego.utils import debug, progress
                    
                
shorter.py https://bitbucket.org/badc0re/xsser_gsoc.git | Python | 76 lines
                    
27from cStringIO import StringIO
                    
28from BeautifulSoup import BeautifulSoup
                    
29
                    
65
                    
66        soup = BeautifulSoup(out.getvalue())
                    
67        if self._service == 'tinyurl':
                    
                
KitMensaService.py https://gitlab.com/namboy94/messengerbot | Python | 230 lines
                    
26import requests
                    
27from bs4 import BeautifulSoup
                    
28from typing import Tuple
                    
162            html = requests.get(url).text
                    
163            soup = BeautifulSoup(html, "html.parser")
                    
164            resource = soup.select('body')
                    
                
2010-11-21-exploring_art_data_3.md https://gitlab.com/rheaplex/robmyers.org | Markdown | 172 lines
                    
23    <tt>#!/usr/bin/python
                    
24    from BeautifulSoup import BeautifulStoneSoup
                    
25    import re
                    
                
catalogparser.py https://github.com/jeffh/YACS.git | Python | 106 lines
                    
2import re
                    
3from BeautifulSoup import BeautifulSoup
                    
4from rpi_courses.config import DEPARTMENTS
                    
40    course_page = re.sub('<br */?>', '\n', course_page)
                    
41    soup = BeautifulSoup(course_page, convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
42    title_text = soup.findAll('h1 h2 h3 h4 h5 h6'.split(' '))[0].text
                    
                
test_markdown_to_html.py https://gitlab.com/Ivy001/pants | Python | 186 lines
                    
173
                    
174      soup = bs4.BeautifulSoup(markup=html)
                    
175      self.assertIsNotNone(soup.find(text='A good link:'))
                    
                
utils.py https://github.com/Gautier/django-page-cms.git | Python | 139 lines
                    
119        return content
                    
120    from BeautifulSoup import BeautifulSoup
                    
121    tree = BeautifulSoup(content)
                    
                
base.py https://github.com/2dpodcast/jaikuenginepatch.git | Python | 209 lines
                    
17
                    
18from beautifulsoup import BeautifulSoup
                    
19
                    
193    self.assertWellformed(response)
                    
194    parsed = BeautifulSoup.BeautifulSoup(response.content)
                    
195    found = parsed.findAll('a', attrs = { 'class': link_class})
                    
                
whit.py https://bitbucket.org/chef1991/whit.git | Python | 361 lines
                    
1from bs4 import BeautifulSoup               # HTML handling
                    
2from flask import Flask, request, redirect  # Routing
                    
80    #   Ensure the overview is well formed
                    
81    overview = BeautifulSoup(entryText)
                    
82
                    
264    p3 = p2['parse']['text']['*']
                    
265    p4 = BeautifulSoup(p3)
                    
266    p5 = p4.find_all('p')
                    
                
release.py https://gitlab.com/LocutusOfPenguin/python-chess | Python | 178 lines
                    
141        sys.exit(1)
                    
142    soup = bs4.BeautifulSoup(res.text, "html.parser")
                    
143    csrf = soup.find("input", {"name": "CSRFToken"})["value"]
                    
                
util.py https://gitlab.com/Lett1/SlackDuckBot | Python | 151 lines
                    
5import re
                    
6from bs4 import BeautifulSoup
                    
7from urllib.request import Request, urlopen
                    
113    else:
                    
114        soup = BeautifulSoup(html, "lxml")
                    
115        if soup.title is not None:
                    
                
importer.py https://github.com/gregmalcolm/Bookie.git | Python | 201 lines
                    
2from datetime import datetime
                    
3from BeautifulSoup import BeautifulSoup
                    
4from bookie.models import BmarkMgr
                    
78
                    
79        soup = BeautifulSoup(file_io)
                    
80        can_handle = False
                    
90        """Given a file, process it"""
                    
91        soup = BeautifulSoup(self.file_handle)
                    
92
                    
140        """
                    
141        soup = BeautifulSoup(file_io)
                    
142        can_handle = False
                    
157        """
                    
158        soup = BeautifulSoup(self.file_handle)
                    
159        if not soup.contents[0] == "DOCTYPE NETSCAPE-Bookmark-file-1":
                    
                
types.py https://github.com/rxuriguera/bibtexIndexMaker.git | Python | 87 lines
                    
37    self.msg attribute contains explanation why parsing failed
                    
38    self.tag attribute contains BeautifulSoup object with the most relevant tag
                    
39    that failed to parse
                    
                
setup.py https://github.com/eged/django-blog-zinnia.git | Python | 37 lines
                    
33                        'akismet',
                    
34                        'BeautifulSoup',
                    
35                        ])
                    
                
inject.py https://gitlab.com/BoTranVan/MITMf | Python | 195 lines
                    
23
                    
24from bs4 import BeautifulSoup
                    
25from plugins.plugin import Plugin
                    
86    	    if encoding is not None:
                    
87                html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
                    
88    	    else:
                    
88    	    else:
                    
89                html = BeautifulSoup(data, "lxml")
                    
90
                    
98                if self.html_payload:
                    
99                    payload = BeautifulSoup(self.html_payload, "html.parser")
                    
100                    html.body.append(payload)
                    
104                    with open(self.html_file, 'r') as file:
                    
105                        payload = BeautifulSoup(file.read(), "html.parser")
                    
106                        html.body.append(payload)
                    
                
tvsubtitles.py https://github.com/junalmeida/Sick-Beard.git | Python | 191 lines
                    
98        :return: the response
                    
99        :rtype: :class:`bs4.BeautifulSoup`
                    
100
                    
104            raise ProviderError('Request failed with status code %d' % r.status_code)
                    
105        return bs4.BeautifulSoup(r.content, ['permissive'])
                    
106
                    
                
test_microformats.py https://bitbucket.org/inirudebwoy/gdziebylkaziu.git | Python | 155 lines
                    
5try:
                    
6    from BeautifulSoup import BeautifulSoup
                    
7except ImportError:
                    
7except ImportError:
                    
8    BeautifulSoup = None
                    
9
                    
25    def test_one_soup(self):
                    
26        if BeautifulSoup:
                    
27            locations = self.parser.find_all(BeautifulSoup(self.MARKUP))
                    
31    def test_multi_soup(self):
                    
32        if BeautifulSoup:
                    
33            locations = self.parser.find_all(BeautifulSoup(self.MARKUP * 3))
                    
50    def test_none_soup(self):
                    
51        if BeautifulSoup:
                    
52            locations = self.parser.find_all(BeautifulSoup(self.MARKUP))
                    
                
sponsoredlinks.py https://bitbucket.org/manaphassan/raspberry-pwn.git | Python | 235 lines
                    
14from htmlentitydefs import name2codepoint
                    
15from BeautifulSoup import BeautifulSoup
                    
16
                    
30    self.msg attribute contains explanation why parsing failed
                    
31    self.tag attribute contains BeautifulSoup object with the most relevant tag that failed to parse
                    
32    Thrown only in debug mode
                    
158
                    
159        return BeautifulSoup(page)
                    
160
                    
                
share_post.py https://gitlab.com/janninematt/janninematt | Python | 81 lines
                    
8
                    
9from bs4 import BeautifulSoup
                    
10try:
                    
18def article_title(content):
                    
19    main_title = BeautifulSoup(content.title, 'html.parser').get_text().strip()
                    
20    sub_title = ''
                    
21    if hasattr(content, 'subtitle'):
                    
22        sub_title = ' ' + BeautifulSoup(content.subtitle, 'html.parser').get_text().strip()
                    
23    return quote(('%s%s' % (main_title, sub_title)).encode('utf-8'))
                    
31def article_summary(content):
                    
32    return quote(BeautifulSoup(content.summary, 'html.parser').get_text().strip().encode('utf-8'))
                    
33
                    
                
models.py https://github.com/mci/mpatlas.git | Python | 250 lines
                    
13from ckeditor.fields import RichTextField
                    
14from bs4 import BeautifulSoup
                    
15from uuslug import uuslug, slugify
                    
                
cablemodem_check.py https://gitlab.com/mikeos2/Nagios_Plugins | Python | 223 lines
                    
38try:
                    
39    from bs4 import BeautifulSoup
                    
40except ImportError:
                    
40except ImportError:
                    
41    print "Error: (" + str(Nagios_UNKNOWN) + ") install BeautifulSoup!"
                    
42    sys.exit(Nagios_UNKNOWN)
                    
69
                    
70        return BeautifulSoup(page)
                    
71
                    
                
get_legislation.py https://github.com/gosuri/fiftystates.git | Python | 117 lines
                    
14    state = 'nc'
                    
15    soup_parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse
                    
16
                    
                
make_chart.py https://github.com/egor83/hn-stuff.git | Python | 181 lines
                    
1import BeautifulSoup
                    
2import logging
                    
111        
                    
112        soup = BeautifulSoup.BeautifulSoup(page)
                    
113
                    
                
__init__.py https://github.com/junalmeida/Sick-Beard.git | Python | 307 lines
                    
75
                    
76# The BeautifulSoup class will take feature lists from developers and use them
                    
77# to look up builders in this registry.
                    
                
Weather.py https://gitlab.com/leiftomas/jasper-client | Python | 172 lines
                    
43                     'international_cities.asp')
                    
44    soup = bs4.BeautifulSoup(r.text)
                    
45    data = soup.find(id="inner-content").find('pre').string
                    
                
subtitle-downloader.py https://gitlab.com/132nd-etcher/subtitle-downloader | Python | 124 lines
                    
18import requests,time,re,zipfile
                    
19from bs4 import BeautifulSoup
                    
20PY_VERSION = sys.version_info[0]
                    
74        r=requests.get("http://subscene.com/subtitles/release?q="+root);
                    
75        soup=BeautifulSoup(r.content,"lxml")
                    
76        atags=soup.find_all("a")
                    
83            r=requests.get("http://subscene.com"+href);
                    
84            soup=BeautifulSoup(r.content,"lxml")
                    
85            lin=soup.find_all('a',attrs={'id':'downloadButton'})[0].get("href")
                    
86            r=requests.get("http://subscene.com"+lin);
                    
87            soup=BeautifulSoup(r.content,"lxml")
                    
88            subfile=open(root2+".zip", 'wb')
                    
                
baseparser.py https://gitlab.com/andyblaesus/newsdiffs | Python | 156 lines
                    
52# Ick.
                    
53from BeautifulSoup import BeautifulSoup
                    
54def bs_fixed_getText(self, separator=u""):
                    
54def bs_fixed_getText(self, separator=u""):
                    
55    bsmod = sys.modules[BeautifulSoup.__module__]
                    
56    if not len(self.contents):
                    
65    return separator.join(strings)
                    
66sys.modules[BeautifulSoup.__module__].Tag.getText = bs_fixed_getText
                    
67# End fix
                    
111
                    
112    feeder_bs = BeautifulSoup #use this version of beautifulsoup for feed
                    
113
                    
                
fileops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 136 lines
                    
6from pprint import pprint
                    
7from bs4 import BeautifulSoup
                    
8from sh import git
                    
87    with open(filename, 'w') as f:
                    
88        f.write(BeautifulSoup(html, 'html5lib').prettify() + '\n')
                    
89    gitCommitModel(contest['model'], filename, 'contest created: ' + model['slug'])
                    
105    with open(filename, 'w') as f:
                    
106        f.write(BeautifulSoup(html, "html5lib").prettify() + "\n")
                    
107    gitCommitModel(challenge, filename, 'challenge created: ' + challenge['slug'])
                    
                
plugin.py https://github.com/lbjay/supybot-plugins.git | Python | 47 lines
                    
9
                    
10from BeautifulSoup import BeautifulSoup
                    
11
                    
23
                    
24    soup =  BeautifulSoup(doc)
                    
25    dd = soup.find('dd', 'highlight')
                    
                
wikipediaidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 211 lines
                    
4import re
                    
5from exe.engine.beautifulsoup import BeautifulSoup
                    
6from exe.engine.idevice       import Idevice
                    
73        page = page.replace(u'&#160;', u'&nbsp;')
                    
74        soup = BeautifulSoup(page, False)
                    
75        content = soup.first('div', {'id': "content"})
                    
                
sitegen.py https://gitlab.com/Ivy001/pants | Python | 374 lines
                    
36  import bs4
                    
37  return bs4.BeautifulSoup(*args, **kwargs)
                    
38
                    
70def load_soups(config):
                    
71  """Generate BeautifulSoup AST for each page listed in config."""
                    
72  soups = {}
                    
                
ONPEcrawler.py https://github.com/PuercoPop/EleccionesPeru.git | Python | 169 lines
                    
4from  urllib2 import Request, urlopen
                    
5from BeautifulSoup import BeautifulSoup
                    
6import Elecciones.models as m
                    
54            f = urlopen( req )
                    
55            soup = BeautifulSoup( f.read(),
                    
56                                  convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
68                f = urlopen( req )
                    
69                soup = BeautifulSoup( f.read(),
                    
70                                      convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
79                    f=urlopen(req)
                    
80                    soup = BeautifulSoup( f.read(),
                    
81                                          convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
105                        f = urlopen( req )
                    
106                        soup = soup.BeautifulSoup( f.read(),
                    
107                                            convertEntities=BeautifulSoup.HTML_ENTITIES)
                    
                
download_russian_contrast.py https://bitbucket.org/Meister17/term-extraction.git | Python | 108 lines
                    
2# -*- coding: utf-8 -*-
                    
3from BeautifulSoup import BeautifulSoup
                    
4import optparse
                    
38    html = response.read()
                    
39    soup = BeautifulSoup(html)
                    
40    zero_result = False;
                    
                
__init__.py https://github.com/oesmith/django-css.git | Python | 316 lines
                    
3import subprocess
                    
4from BeautifulSoup import BeautifulSoup
                    
5from tempfile import NamedTemporaryFile
                    
54        self.split_content = []
                    
55        self.soup = BeautifulSoup(self.content)
                    
56        self.xhtml = xhtml
                    
268                    basename = os.path.splitext(os.path.basename(filename))[0]
                    
269                    elem = BeautifulSoup(re.sub(basename+ext,basename+'.css',unicode(elem)))
                    
270                    filename = path + '.css'
                    
                
test_pipreqs.py https://gitlab.com/Kravcenko/pipreqs | Python | 188 lines
                    
23                        'peewee', 'ujson', 'nonexistendmodule', 'bs4', 'after_method_is_valid_even_if_not_pep8' ]
                    
24        self.modules2 = ['beautifulsoup4']
                    
25        self.local = ["docopt", "requests", "nose", 'pyflakes']
                    
                
phew.py https://gitlab.com/fnaticshank/crawler | Python | 327 lines
                    
13import requests
                    
14from bs4 import BeautifulSoup
                    
15
                    
213                        errors="replace")
                    
214                soup = BeautifulSoup(content, "lxml")
                    
215                tags = soup('a')
                    
                
layouttestresults.py https://gitlab.com/x33n/phantomjs | Python | 91 lines
                    
31from webkitpy.common.net.resultsjsonparser import ResultsJSONParser
                    
32from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup, SoupStrainer
                    
33from webkitpy.layout_tests.models import test_results
                    
                
clozeidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 240 lines
                    
150        """
                    
151        takes a BeautifulSoup fragment (i) and bursts its contents to 
                    
152        import this idevice from a CommonCartridge export
                    
                
 

Source

Language