PageRenderTime 161ms queryTime 29ms sortTime 2ms getByIdsTime 66ms findMatchingLines 27ms

100+ results results for 'beautifulsoup' (161 ms)

Not the results you expected?
registerbot.py https://gitlab.com/Desikan/botebc | Python | 296 lines
                    
11from PIL import ImageDraw
                    
12from bs4 import BeautifulSoup 
                    
13
                    
93		raw_html = self.browser.response().read()
                    
94		soup = BeautifulSoup(raw_html)
                    
95
                    
149		   mechanize object.'''
                    
150		soup = BeautifulSoup(self.response)
                    
151		formcount=0
                    
165		   mechanize object.'''
                    
166		soup = BeautifulSoup(self.response)
                    
167		formcount=0
                    
180		''' Displays all the links from the current browser page.'''
                    
181		soup = BeautifulSoup(raw_html)
                    
182		categories_table_obj = soup.find("table", attrs={"id":'my_table'})
                    
                
google_soup.py https://github.com/chudler/Community-Zenpacks.git | Python | 221 lines
                    
12import logging.handlers
                    
13from BeautifulSoup import BeautifulSoup
                    
14from ClientForm import *
                    
83    def findApplications(self):
                    
84        main_apps = BeautifulSoup(self.resetClient())
                    
85        column_headers = []
                    
87        for table in main_apps.findAll('table', limit=1):
                    
88            # table is a BeautifulSoup.Tag object
                    
89            column_headers = self.extract_headings(table)
                    
148            quota_details = re.sub(quota_fix, '', quota_details)
                    
149            quota_soup = BeautifulSoup(quota_details)
                    
150            quota_section = quota_soup.find(attrs={'id':'ae-quota-details'})
                    
168        app_main = open('/tmp/dashboard.html', 'r').read()
                    
169        app_soup = BeautifulSoup(app_main)
                    
170        load_section = app_soup.find(text=re.compile('Current Load'))
                    
                
testFunctional.py https://github.com/bogtan/Naaya.git | Python | 212 lines
                    
21from unittest import TestSuite, makeSuite
                    
22from BeautifulSoup import BeautifulSoup
                    
23
                    
147        html = self.browser.get_html()
                    
148        soup = BeautifulSoup(html)
                    
149
                    
                
_htmlparser.py https://github.com/yoheia/yoheia.git | Python | 265 lines
                    
53
                    
54class BeautifulSoupHTMLParser(HTMLParser):
                    
55    def handle_starttag(self, name, attrs):
                    
163        args, kwargs = self.parser_args
                    
164        parser = BeautifulSoupHTMLParser(*args, **kwargs)
                    
165        parser.soup = self.soup
                    
169            warnings.warn(RuntimeWarning(
                    
170                "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help."))
                    
171            raise e
                    
198""", re.VERBOSE)
                    
199    BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend
                    
200
                    
261
                    
262    BeautifulSoupHTMLParser.parse_starttag = parse_starttag
                    
263    BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_mode
                    
                
TV3Scrapper.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 254 lines
                    
9import sys
                    
10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup
                    
11import urllib, urllib2
                    
                
TVSeriesUtil.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 224 lines
                    
9import sys
                    
10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup
                    
11import urllib, urllib2, cookielib
                    
                
main.py https://github.com/gtracy/APODEmail.git | Python | 249 lines
                    
18
                    
19from BeautifulSoup import BeautifulSoup, Tag
                    
20
                    
184
                    
185     soup = BeautifulSoup(result.content)
                    
186     #logging.debug(soup)
                    
                
hackerrankops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 200 lines
                    
9import logging # TODO get rid of these print statements!
                    
10from bs4 import BeautifulSoup
                    
11
                    
173        return
                    
174    csrfHtml = BeautifulSoup(r.text, 'html.parser').find(id = 'csrf-token')
                    
175    if csrfHtml:
                    
                
diagnose.py https://gitlab.com/Rheinhart/csuchen-Guard | Python | 216 lines
                    
8import bs4
                    
9from bs4 import BeautifulSoup, __version__
                    
10from bs4.builder import builder_registry
                    
69        try:
                    
70            soup = BeautifulSoup(data, parser)
                    
71            success = True
                    
178            a = time.time()
                    
179            soup = BeautifulSoup(data, parser)
                    
180            b = time.time()
                    
207    vars = dict(bs4=bs4, data=data, parser=parser)
                    
208    cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename)
                    
209
                    
                
quiztestidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 306 lines
                    
217        """
                    
218        takes a BeautifulSoup fragment (i) and bursts its contents to 
                    
219        import this idevice from a CommonCartridge export
                    
                
default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 406 lines
                    
32
                    
33from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
                    
34import socket
                    
108
                    
109        beautifulSoup = BeautifulSoup(http)
                    
110        userPanel = beautifulSoup.find('a', {"id": "loginlink"})
                    
171
                    
172    beautifulSoup = BeautifulSoup(http)
                    
173    content = beautifulSoup.find('div', attrs={'id': 'dle-content'})
                    
243
                    
244    beautifulSoup = BeautifulSoup(http)
                    
245    categoryContainer = beautifulSoup.find('ul', 'cats')
                    
273
                    
274    beautifulSoup = BeautifulSoup(http)
                    
275    tagsContainer = beautifulSoup.find('td', 'news')
                    
                
jobs.py git://pkgs.fedoraproject.org/sugar-read | Python | 310 lines
                    
26import os.path
                    
27import BeautifulSoup
                    
28
                    
71    def _searchfile(self, fileobj):
                    
72        soup = BeautifulSoup.BeautifulSoup(fileobj)
                    
73        body = soup.find('body')
                    
                
test_converter_unittest.py https://gitlab.com/x33n/phantomjs | Python | 319 lines
                    
34from webkitpy.common.system.outputcapture import OutputCapture
                    
35from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup
                    
36from webkitpy.w3c.test_converter import W3CTestConverter
                    
185
                    
186        doc = BeautifulSoup(test_html)
                    
187        oc = OutputCapture()
                    
266        try:
                    
267            converted = converter.convert_prefixed_properties(BeautifulSoup(test_content[1]), DUMMY_FILENAME)
                    
268        finally:
                    
281        if isinstance(converted, basestring):
                    
282            converted = BeautifulSoup(converted)
                    
283
                    
                
mastodon.scm https://gitlab.com/daym/guix | Scheme | 174 lines
                    
60    (inputs
                    
61     `(("python-beautifulsoup4" ,python-beautifulsoup4)
                    
62       ("python-requests" ,python-requests)
                    
                
browser.py https://bitbucket.org/synl0rd/upt_tik_itenas.git | Python | 236 lines
                    
87        import BeautifulSoup
                    
88        return BeautifulSoup.BeautifulSoup(self.data)
                    
89
                    
                
util.py https://github.com/sunlightlabs/muni_words.git | Python | 198 lines
                    
8from django.contrib.gis.geos import Point
                    
9from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
                    
10from excludes import EXCLUDED
                    
40def strip_html(string):
                    
41    return ''.join([e for e in BeautifulSoup(string).recursiveChildGenerator() if isinstance(e, unicode)]).replace(' ', ' ')
                    
42
                    
                
lec_04_scraping.ipynb https://gitlab.com/xbsd/content | Jupyter | 379 lines
                    
23      "from pattern import web\n",
                    
24      "from BeautifulSoup import BeautifulSoup"
                    
25     ],
                    
261     "source": [
                    
262      "# Using BeautifulSoup"
                    
263     ]
                    
268     "input": [
                    
269      "bs = BeautifulSoup(r.text)\n",
                    
270      "for movie in bs.findAll('td', 'title'):\n",
                    
                
get_legislation.py https://github.com/chrismetcalf/fiftystates.git | Python | 112 lines
                    
4import datetime as dt
                    
5from BeautifulSoup import BeautifulSoup
                    
6
                    
18
                    
19        # Get the details page and parse it with BeautifulSoup. These
                    
20        # pages contain a malformed 'p' tag that (certain versions of)
                    
23        details_raw = details_raw.replace('<P ALIGN=CENTER">', '')
                    
24        details = BeautifulSoup(details_raw)
                    
25
                    
26        # Get the history page (following a link from the details page).
                    
27        # Once again, we remove tags that BeautifulSoup chokes on
                    
28        # (including all meta tags, because bills with quotation marks
                    
35        history_raw = rem_meta.sub('</title></head>', history_raw)
                    
36        history = BeautifulSoup(history_raw)
                    
37
                    
                
examples.py https://github.com/towerjoo/django-test-extensions.git | Python | 112 lines
                    
103    def test_using_beautiful_soup(self):
                    
104        "Example test for content on a given view, this time using the BeautifulSoup parser"
                    
105        response = self.client.get('/example/')
                    
105        response = self.client.get('/example/')
                    
106        soup = BeautifulSoup(response.content)
                    
107        self.assert_equal("Page Title", soup.find("title").string.strip())
                    
                
lxml.html.ElementSoup-module.html https://github.com/jcrobak/hue.git | HTML | 278 lines
                    
64<h1 class="epydoc">Module ElementSoup</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.ElementSoup-pysrc.html">source&nbsp;code</a></span></p>
                    
65Legacy interface to the BeautifulSoup HTML parser.
                    
66
                    
91        <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br />
                    
92      Convert a BeautifulSoup tree to a list of Element trees.</td>
                    
93          <td align="right" valign="top">
                    
108          <td><span class="summary-sig"><a name="parse"></a><span class="summary-sig-name">parse</span>(<span class="summary-sig-arg">file</span>,
                    
109        <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>,
                    
110        <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span></td>
                    
141    </td><td class="summary">
                    
142        <a href="lxml.html.ElementSoup-module.html#__doc__" class="summary-name" onclick="show_private();">__doc__</a> = <code title="&quot;&quot;&quot;Legacy interface to the BeautifulSoup HTML parser.
                    
143&quot;&quot;&quot;">&quot;&quot;&quot;Legacy interface to the BeautifulSoup HTML pars<code class="variable-ellipsis">...</code></code>
                    
179  
                    
180  <p>Convert a BeautifulSoup tree to a list of Element trees.</p>
                    
181<p>Returns a list instead of a single root Element to support
                    
                
bills.py https://github.com/runderwood/openstates.git | Python | 293 lines
                    
3
                    
4from BeautifulSoup import BeautifulSoup
                    
5
                    
33            with self.urlopen(index_file) as doc:
                    
34                soup = BeautifulSoup(cleanup_html(doc))
                    
35
                    
123        with self.urlopen(url) as doc:
                    
124            soup = BeautifulSoup(doc)
                    
125            date=None
                    
                
Makefile https://gitlab.com/lokiexinferis/vim-configs | Makefile | 82 lines
                    
78	virtualenv build/html2vimdoc
                    
79	build/html2vimdoc/bin/pip install beautifulsoup coloredlogs==4.0 markdown
                    
80
                    
                
tracker.py https://gitlab.com/jan.raddatz/myimmitracker-analyzer | Python | 163 lines
                    
3#pip install --proxy proxy:8080 bs4
                    
4from bs4 import BeautifulSoup
                    
5# pip install --proxy proxy:8080 ansicolors
                    
80#	r = requests.get(hostname + url_to_scrape, proxies=proxyDict)
                    
81	soup = BeautifulSoup(r.text, 'html.parser')
                    
82	all_tables = soup.find_all('table')
                    
                
module-tree.html https://github.com/jcrobak/hue.git | HTML | 170 lines
                    
73    <ul>
                    
74    <li> <strong class="uidlink"><a href="lxml.html.ElementSoup-module.html">lxml.html.ElementSoup</a></strong>: <em class="summary">Legacy interface to the BeautifulSoup HTML parser.</em>    </li>
                    
75    <li class="private"> <strong class="uidlink">lxml.html._dictmixin</strong>    </li>
                    
82    <li> <strong class="uidlink"><a href="lxml.html.html5parser-module.html">lxml.html.html5parser</a></strong>: <em class="summary">An interface to html5lib.</em>    </li>
                    
83    <li> <strong class="uidlink"><a href="lxml.html.soupparser-module.html">lxml.html.soupparser</a></strong>: <em class="summary">External interface to the BeautifulSoup HTML parser.</em>    </li>
                    
84    <li> <strong class="uidlink"><a href="lxml.html.usedoctest-module.html">lxml.html.usedoctest</a></strong>: <em class="summary">Doctest module for HTML comparison.</em>    </li>
                    
                
Makefile https://github.com/freebsd/freebsd-ports.git | Makefile | 90 lines
                    
49EXCEL_DESC=	MS Excel I/O Add-ons
                    
50HTML5LIB_DESC=	Parse HTML with www/py-html5lib and www/py-beautifulsoup
                    
51HTML_DESC=	HTML Parsing/Generation Add-ons
                    
52JINJA2_DESC=	Support conditional HTML formatting with devel/py-Jinja2
                    
53LXML_DESC=	Parse HTML with devel/py-lxml and www/py-beautifulsoup
                    
54MPL_DESC=	Support graphical plotting output via math/py-matplotlib
                    
72BTLNCK_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}bottleneck>=1.2.0:math/py-bottleneck@${PY_FLAVOR}
                    
73HTML5LIB_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \
                    
74			${PYTHON_PKGNAMEPREFIX}html5lib>0:www/py-html5lib@${PY_FLAVOR}
                    
75JINJA2_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}Jinja2>0:devel/py-Jinja2@${PY_FLAVOR}
                    
76LXML_RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \
                    
77			${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR}
                    
                
Parser.py https://gitlab.com/Fremis/IRCourse | Python | 276 lines
                    
7
                    
8from bs4 import BeautifulSoup, NavigableString
                    
9import gc
                    
74            try:
                    
75                soup = BeautifulSoup(open_file, 'lxml')
                    
76            except UnicodeDecodeError:
                    
                
testFunctional.py https://github.com/bogtan/Naaya.git | Python | 221 lines
                    
22from copy import deepcopy
                    
23from BeautifulSoup import BeautifulSoup
                    
24
                    
153        html = self.browser.get_html()
                    
154        soup = BeautifulSoup(html)
                    
155
                    
                
primewire.py https://bitbucket.org/Leia18/gmc.git | Python | 207 lines
                    
5
                    
6from BeautifulSoup import BeautifulSoup
                    
7from universalscrapers import proxy
                    
28        try:
                    
29            html = BeautifulSoup(self.get_html(title, self.moviesearch_link))
                    
30            index_items = html.findAll('div', attrs={'class': 'index_item index_item_ie'})
                    
68        try:
                    
69            html = BeautifulSoup(self.get_html(title, self.tvsearch_link))
                    
70            index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')})
                    
89                            show_url = urlparse.urljoin(self.base_link, href)
                    
90                            html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item'))
                    
91
                    
123        html = proxy.get(url, 'searchform')
                    
124        parsed_html = BeautifulSoup(html)
                    
125        key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]
                    
                
citotron.py https://gitlab.com/maxigas/citotron.git | Python | 265 lines
                    
8from args import args
                    
9from bs4 import BeautifulSoup as bs
                    
10from collections import Counter as counter
                    
                
HelpIndex.py https://github.com/esitarski/CrossMgr.git | Python | 98 lines
                    
9import re
                    
10from bs4 import BeautifulSoup
                    
11
                    
45	for f in glob.iglob( os.path.join(htmlDocDir, '*.html') ):
                    
46		doc = BeautifulSoup( open(f).read(), 'html.parser' )
                    
47		div = doc.find('div', class_='content')
                    
                
__init__.py https://github.com/theduke/sehistory.git | Python | 238 lines
                    
2
                    
3from libraries.BeautifulSoup import BeautifulSoup
                    
4
                    
152    def extractLogo(self, html):
                    
153        soup = BeautifulSoup(html)
                    
154    
                    
                
_html5lib.py https://gitlab.com/eientei95/crunchy-xml-decoder | Python | 221 lines
                    
82    def fragmentClass(self):
                    
83        self.soup = BeautifulSoup("")
                    
84        self.soup.name = "[document_fragment]"
                    
                
filter.py https://gitlab.com/cobhuni/hadith_alislam_extractor | Python | 197 lines
                    
31from argparse import ArgumentParser
                    
32from bs4 import BeautifulSoup
                    
33import multiprocessing as mp
                    
68    Args:
                    
69        t (class 'bs4.BeautifulSoup'): html to parse
                    
70        fn (str): filename, to trace errors.
                    
72    Returns:
                    
73        class 'bs4.BeautifulSoup': cell containing the text
                    
74    
                    
117    with open(os.path.join(args.input_dir, fname)) as inf:
                    
118        soup = BeautifulSoup(inf.read(),'lxml')
                    
119
                    
                
requirements_txt_linker_spec.rb https://gitlab.com/YarNayar/gitlab-ce | Ruby | 95 lines
                    
28        nose-cov
                    
29        beautifulsoup4
                    
30        #
                    
70      expect(subject).to include(link('nose-cov', 'https://pypi.python.org/pypi/nose-cov'))
                    
71      expect(subject).to include(link('beautifulsoup4', 'https://pypi.python.org/pypi/beautifulsoup4'))
                    
72      expect(subject).to include(link('docopt', 'https://pypi.python.org/pypi/docopt'))
                    
                
imo.py https://gitlab.com/rithvikvibhu/batch-sof | Python | 71 lines
                    
3import pprint
                    
4from bs4 import BeautifulSoup
                    
5
                    
47	
                    
48	soup = BeautifulSoup(r.text, "html5lib")                                                            # Soup up html
                    
49	table_data = [[cell.text for cell in row("td")]
                    
49	table_data = [[cell.text for cell in row("td")]
                    
50		for row in BeautifulSoup(r.text, "html5lib")("tr")]
                    
51		
                    
                
prototype_to_cix.py https://gitlab.com/Smileyt/KomodoEdit | Python | 295 lines
                    
48Requirements:
                    
49  * BeautifulSoup   (http://www.crummy.com/software/BeautifulSoup/)
                    
50  * cElementTree    (http://effbot.org/downloads/#cElementTree)
                    
65
                    
66from BeautifulSoup import BeautifulSoup, NavigableString
                    
67
                    
260    data = getPrototypeDocsFromWebpage()
                    
261    soup = BeautifulSoup(data)
                    
262    cix_root = createCixRoot(name="Prototype", description="JavaScript framework for web development")
                    
                
settings.py https://github.com/knabar/fynbos.git | Python | 64 lines
                    
50# the backend to use when parsing the JavaScript or Stylesheet files
                    
51PARSER = getattr(settings, 'COMPRESS_PARSER', 'compressor.parser.BeautifulSoupParser')
                    
52
                    
                
test_archives.py https://gitlab.com/Acidburn0zzz/hyperkitty | Python | 341 lines
                    
33from mock import Mock
                    
34from bs4 import BeautifulSoup
                    
35from django.contrib.auth.models import User
                    
317        """
                    
318        soup = BeautifulSoup(html, "html.parser")
                    
319        months_list = soup.find(id="months-list")
                    
                
toc.py https://gitlab.com/janninematt/janninematt | Python | 145 lines
                    
12
                    
13from bs4 import BeautifulSoup, Comment
                    
14
                    
119    tree = node = HtmlTreeNode(None, title, 'h0', '')
                    
120    soup = BeautifulSoup(content._content, 'html.parser')
                    
121    settoc = False
                    
137        tree_string = '{}'.format(tree)
                    
138        tree_soup = BeautifulSoup(tree_string, 'html.parser')
                    
139        content.toc = tree_soup.decode(formatter='html')
                    
                
get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 221 lines
                    
15    soup_parser = html5lib.HTMLParser(
                    
16        tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse
                    
17
                    
                
__init__.py https://bitbucket.org/rattray/popcorn-portal.git | Python | 355 lines
                    
3"The Screen-Scraper's Friend"
                    
4http://www.crummy.com/software/BeautifulSoup/
                    
5
                    
15documentation:
                    
16http://www.crummy.com/software/BeautifulSoup/bs4/doc/
                    
17"""
                    
23
                    
24__all__ = ['BeautifulSoup']
                    
25
                    
48
                    
49class BeautifulSoup(Tag):
                    
50    """
                    
91                "BS4 does not respect the convertEntities argument to the "
                    
92                "BeautifulSoup constructor. Entities are always converted "
                    
93                "to Unicode characters.")
                    
                
build.py https://gitlab.com/imbest91/grapejuice | Python | 537 lines
                    
16import yaml
                    
17from bs4 import BeautifulSoup
                    
18from jinja2 import Environment, FileSystemLoader, select_autoescape
                    
261
                    
262        md_soup = BeautifulSoup(rendered_markdown, "lxml")
                    
263        summarizer = Summarizer(break_pads=["[summary-snip]"])
                    
357
                    
358    soup = BeautifulSoup(content, "html5lib")
                    
359
                    
                
default.py https://gitlab.com/billyprice1/husham.com | Python | 280 lines
                    
10import requests
                    
11from BeautifulSoup import BeautifulSoup as bs
                    
12from utils.webutils import *
                    
                
main.py https://gitlab.com/smidaharoun/devoirTunisiePython | Python | 197 lines
                    
3
                    
4from bs4 import BeautifulSoup
                    
5from flask import Flask, jsonify
                    
13page = urllib2.urlopen(main)
                    
14soup = BeautifulSoup(page, 'html.parser')
                    
15soup.prettify()
                    
68    page_level = urllib2.urlopen(url_level)
                    
69    soup_level = BeautifulSoup(page_level, 'html.parser')
                    
70    soup_level.prettify()
                    
100    page_level = urllib2.urlopen(url_level)
                    
101    soup_level = BeautifulSoup(page_level, 'html.parser')
                    
102    soup_level.prettify()
                    
132    page_level = urllib2.urlopen(url_level)
                    
133    soup_level = BeautifulSoup(page_level, 'html.parser')
                    
134    soup_level.prettify()
                    
                
editor.py https://gitlab.com/dannywillems/geeknote | Python | 259 lines
                    
5import tempfile
                    
6from bs4 import BeautifulSoup, NavigableString
                    
7import threading
                    
56
                    
57        # soup.select cant be used with dashes: https://bugs.launchpad.net/beautifulsoup/+bug/1276211
                    
58        for todo in soup.find_all('en-todo'):
                    
78    def ENMLtoText(contentENML):
                    
79        soup = BeautifulSoup(contentENML.decode('utf-8'))
                    
80
                    
115        '''
                    
116        Transforms github style checklists `* [ ]` in the BeautifulSoup tree to
                    
117        enml.
                    
172
                    
173              soup = BeautifulSoup(contentHTML, 'html.parser')
                    
174              Editor.checklistInSoupToENML(soup)
                    
                
testFunctional.py https://github.com/eaudeweb/Naaya.git | Python | 151 lines
                    
1import re
                    
2from BeautifulSoup import BeautifulSoup
                    
3
                    
141        html = self.browser.get_html()
                    
142        soup = BeautifulSoup(html)
                    
143
                    
                
build.py https://code.google.com/p/python-for-android/ | Python | 204 lines
                    
105print 'Installing BeautifulSoup.'
                    
106beautifulsoup_path = os.path.join(pwd, 'python-libs','BeautifulSoup')
                    
107compileall.compile_dir(beautifulsoup_path)
                    
107compileall.compile_dir(beautifulsoup_path)
                    
108shutil.copy(os.path.join(beautifulsoup_path, 'BeautifulSoup.pyc'),
                    
109            'output/usr/lib/python2.6/BeautifulSoup.pyc')
                    
                
README.rst https://github.com/liberation/django_compressor.git | ReStructuredText | 71 lines
                    
36is done using lxml_ or if it's not available Python's built-in HTMLParser by
                    
37default. As an alternative Django Compressor provides a BeautifulSoup_ and a
                    
38html5lib_ based parser, as well as an abstract base class that makes it easy to
                    
58
                    
59.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
                    
60.. _lxml: http://lxml.de/
                    
                
PROJECT_IDEAS.md https://gitlab.com/ini-python-lab-course/ss16 | Markdown | 126 lines
                    
49* [import.io](https://import.io/): Service that extracts data from websites
                    
50* [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/): Convenient access to content of a downloaded website
                    
51* [Scrapy](http://scrapy.org/): Framework for scraping websites
                    
                
single.py https://gitlab.com/skororu/pysnippets | Python | 67 lines
                    
8
                    
9import bs4                # BeautifulSoup
                    
10import requests           # codes.ok, get
                    
29        req = requests.get(url)
                    
30        page = bs4.BeautifulSoup(req.text, 'lxml')
                    
31
                    
45    with requests.get(base_url) as req:
                    
46        page = bs4.BeautifulSoup(req.text, 'lxml')
                    
47        num_previous = page.find('a', rel='prev')['href']
                    
                
independent.py https://gitlab.com/harrigan/TPP | Python | 36 lines
                    
5from crimespider.items import CrimeItem
                    
6from bs4 import BeautifulSoup
                    
7
                    
30            article += c.extract()
                    
31        s = BeautifulSoup(article, 'lxml')
                    
32        print( s.get_text() )
                    
                
urls.html https://github.com/msparks/pyhole.git | HTML | 155 lines
                    
64
                    
65<span class="kn">from</span> <span class="nn">BeautifulSoup</span> <span class="kn">import</span> <span class="n">BeautifulSoup</span>
                    
66
                    
105
                    
106        <span class="n">soup</span> <span class="o">=</span> <span class="n">BeautifulSoup</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
                    
107
                    
                
urlnorm.py https://github.com/e1ven/Lonava.git | Python | 256 lines
                    
28import urllib2
                    
29from BeautifulSoup import BeautifulSoup
                    
30import socket
                    
69        try:
                    
70            soup = BeautifulSoup(html)
                    
71            links = soup.findAll('link')
                    
                
geoserver.py https://github.com/dotskapes/dotSkapes.git | Python | 104 lines
                    
2from urllib2 import urlopen
                    
3from BeautifulSoup import BeautifulStoneSoup
                    
4
                    
                
version_check.py https://gitlab.com/mimizone/kolla | Python | 126 lines
                    
19
                    
20from bs4 import BeautifulSoup as bs
                    
21from oslo_config import cfg
                    
                
readability.py https://gitlab.com/zouxc/cola | Python | 368 lines
                    
31except ImportError:
                    
32    raise DependencyNotInstalledError("BeautifulSoup4")
                    
33
                    
                
bugzilla_unittest.py https://github.com/weissms/owb-mirror.git | Python | 296 lines
                    
34
                    
35from modules.BeautifulSoup import BeautifulSoup
                    
36
                    
204        bugzilla = Bugzilla()
                    
205        soup = BeautifulSoup(self._example_attachment)
                    
206        attachment_element = soup.find("attachment")
                    
                
README.rst https://gitlab.com/gallaecio/chakraversiontracker | ReStructuredText | 245 lines
                    
18
                    
19- `beautifulsoup4 <https://www.crummy.com/software/BeautifulSoup/bs4/doc/>`_
                    
20
                    
                
helper.py https://github.com/macdylan/LBForum.git | Python | 44 lines
                    
2# -*- coding: UTF-8 -*-
                    
3from BeautifulSoup import BeautifulSoup, NavigableString
                    
4from django.conf import settings
                    
31def clean_html( fragment ):
                    
32    soup = BeautifulSoup( fragment.strip() )
                    
33    def cleanup( soup ):
                    
                
generate_featured_pages.py https://github.com/pcdinh/trendingtopics.git | Python | 153 lines
                    
13import urllib2
                    
14from BeautifulSoup import BeautifulSoup
                    
15import datetime
                    
65  page = opener.open( url ).read()
                    
66  soup = BeautifulSoup(page)
                    
67  return soup
                    
                
faq.rst https://gitlab.com/oytunistrator/scrapy | ReStructuredText | 286 lines
                    
5
                    
6How does Scrapy compare to BeautifulSoup or lxml?
                    
7-------------------------------------------------
                    
8
                    
9`BeautifulSoup`_ and `lxml`_ are libraries for parsing HTML and XML. Scrapy is
                    
10an application framework for writing web spiders that crawl web sites and
                    
13Scrapy provides a built-in mechanism for extracting data (called
                    
14:ref:`selectors <topics-selectors>`) but you can easily use `BeautifulSoup`_
                    
15(or `lxml`_) instead, if you feel more comfortable working with them. After
                    
18
                    
19In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like
                    
20comparing `jinja2`_ to `Django`_.
                    
21
                    
22.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/
                    
23.. _lxml: http://lxml.de/
                    
                
ba.py https://github.com/barttenbrinke/Bartsidee-Repository.git | Python | 328 lines
                    
13import cPickle as pickle
                    
14from beautifulsoup.BeautifulSoup import BeautifulSoup
                    
15
                    
125    data = FetchUrl(samiurl, 0)
                    
126    soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml")
                    
127    i = 1
                    
164    data = FetchUrl(path)
                    
165    soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml")
                    
166    i = 1
                    
                
soup.py https://github.com/scottjasta/Places.git | Python | 228 lines
                    
2
                    
3warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning)
                    
4
                    
4
                    
5from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration
                    
6
                    
140        if namespaceHTMLElements:
                    
141            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
                    
142        _base.TreeBuilder.__init__(self, namespaceHTMLElements)
                    
144    def documentClass(self):
                    
145        self.soup = BeautifulSoup("")
                    
146        return Element(self.soup, self.soup, None)
                    
162        if namespace is not None:
                    
163            warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning)
                    
164        return Element(Tag(self.soup, name), self.soup, namespace)
                    
                
ultimate.py https://gitlab.com/eientei95/crunchy-xml-decoder | Python | 351 lines
                    
18import altfuncs
                    
19from bs4 import BeautifulSoup
                    
20from crunchyDec import CrunchyDec
                    
231    media_id = page_url[-6:]
                    
232    xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoPlayer_GetStandardConfig', media_id), 'xml')
                    
233
                    
249        media_id = xmlconfig.find('media_id').string
                    
250        xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoEncode_GetStreamInfo', media_id), 'xml')
                    
251        host = xmlconfig.find('host').string
                    
                
models.py https://github.com/agiliq/Dinette.git | Python | 375 lines
                    
10import hashlib
                    
11from BeautifulSoup import BeautifulSoup
                    
12import datetime
                    
260    def htmlfrombbcode(self):
                    
261        soup = BeautifulSoup(self.message.raw)
                    
262        #remove all html tags from the message
                    
                
adapter_twcslibrarynet.py https://code.google.com/p/fanficdownloader/ | Python | 273 lines
                    
25
                    
26from .. import BeautifulSoup as bs
                    
27from ..htmlcleanup import stripHTML
                    
136        
                    
137        # use BeautifulSoup HTML parser to make everything easier to find.
                    
138        soup = bs.BeautifulSoup(data)
                    
227
                    
228            # use BeautifulSoup HTML parser to make everything easier to find.
                    
229            seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
                    
252
                    
253        chapter=bs.BeautifulSoup('<div class="story"></div>')
                    
254        
                    
254        
                    
255        soup = bs.BeautifulSoup(data)
                    
256
                    
                
yes24_script.py https://github.com/jangxyz/yes24.git | Python | 193 lines
                    
4import urllib, urllib2, cookielib
                    
5from BeautifulSoup import BeautifulSoup
                    
6from datetime import datetime
                    
77    # parse
                    
78    soup = BeautifulSoup(text)
                    
79    order_list_table     = soup.table(id="MyOrderListTbl")[0]
                    
125    # parse
                    
126    soup = BeautifulSoup(text)
                    
127    order_price = soup.find(id="CLbTotOrdAmt").b.string
                    
129    text = '<table>' + text[text[1:].find('<')+1:-7] + '</table>'
                    
130    soup = BeautifulSoup(text)
                    
131    point_saved = soup.find(attrs={'class':"price"}).b.string
                    
                
zad_7.py https://gitlab.com/mmeisel/LV | Python | 43 lines
                    
8import urllib
                    
9from bs4 import BeautifulSoup
                    
10
                    
26html=urllib.urlopen(urlAddr, "lxml").read() #otvara se url
                    
27soup=BeautifulSoup(html)                    #i deklarira objekt tipa BeautifulSoup
                    
28
                    
                
_lxml.py https://bitbucket.org/bendikro/deluge-yarss-plugin.git | Python | 296 lines
                    
60    def initialize_soup(self, soup):
                    
61        """Let the BeautifulSoup object know about the standard namespace
                    
62        mapping.
                    
67    def _register_namespaces(self, mapping):
                    
68        """Let the BeautifulSoup object know about namespaces encountered
                    
69        while parsing the document.
                    
74            if key and key not in self.soup._namespaces:
                    
75                # Let the BeautifulSoup object know about a new namespace.
                    
76                # If there are multiple namespaces defined with the same
                    
189
                    
190            # First, Let the BeautifulSoup object know about it.
                    
191            self._register_namespaces(nsmap)
                    
                
nrlbot.py https://gitlab.com/g.davis13/nrlbot | Python | 344 lines
                    
35
                    
36from bs4 import BeautifulSoup
                    
37from collections import namedtuple
                    
100    r = requests.get(url)
                    
101    soup = BeautifulSoup(r.text, 'html.parser')
                    
102    return soup
                    
                
__init__.py https://bitbucket.org/yourcelf/old-intertwinkles.git | Python | 163 lines
                    
19from django_browserid import get_audience
                    
20from bs4 import BeautifulSoup
                    
21
                    
93        self.assertTrue("test@example.com" in res.content)
                    
94        soup = BeautifulSoup(res.content)
                    
95        self.assertEquals(soup.find(id="id_email").get("value"),
                    
104        self.assertFalse("This address is unconfirmed" in res.content)
                    
105        soup = BeautifulSoup(res.content)
                    
106        self.assertEquals(soup.find(id="id_email").get("value"),
                    
                
conversation.py https://gitlab.com/sanchezfauste/TweetDigraph | Python | 108 lines
                    
1from bs4 import BeautifulSoup
                    
2import requests
                    
95		if req.status_code == 200:
                    
96			html = BeautifulSoup(req.text, 'html.parser')
                    
97			conversations = html.find_all('li', {'class':'ThreadedConversation'})
                    
                
base_fetcher.py https://bitbucket.org/filmaster/filmaster-stable/ | Python | 193 lines
                    
3
                    
4from beautifulsoup import BeautifulSoup
                    
5from optparse import make_option
                    
19
                    
20BeautifulSoup.MARKUP_MASSAGE += [
                    
21    (re.compile(r"<[^>]+>"), lambda tag:quote_re.sub(r"\1 \2", tag.group(0))),
                    
65    def soup(self, data):
                    
66        return BeautifulSoup(data)
                    
67
                    
                
scrape.py https://github.com/sneeu/aliss_scrapers.git | Python | 113 lines
                    
7
                    
8from BeautifulSoup import BeautifulSoup
                    
9
                    
32    html = re.sub('<script.*?>[\s\S]*?</.*?script>', '', html)
                    
33    soup = BeautifulSoup(html)
                    
34
                    
                
index.html https://github.com/larsks/blog.oddbit.com.git | HTML | 240 lines
                    
6        <title>Recent answers on StackOverflow &middot; The Odd Bit</title>
                    
7        <meta name="description" content="Traefik different ports for different Docker containers docker docker-compose traefik  git push can not find -o option git  Interact with podman docker via socket in Redhat 9 docker redhat podman  Capturing commented text in an XML python xml beautifulsoup xml-comments  How to execute a shell script as input on an interactive bash pod in Kubernetes? bash shell kubernetes  Docker : Opensearch refuses connection with the example in opensearch documentation in docker python-3.">
                    
8        <meta name="HandheldFriendly" content="True">
                    
142<li><p><a class="sx-answer sx-answer-accepted" href="https://stackoverflow.com/questions/72681436/capturing-commented-text-in-an-xml/72681822#72681822">Capturing commented text in an XML</a>
                    
143  <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/python">python</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml">xml</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/beautifulsoup">beautifulsoup</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml-comments">xml-comments</a> </p></li>
                    
144
                    
                
bills.py https://github.com/runderwood/openstates.git | Python | 138 lines
                    
5import urllib2
                    
6from BeautifulSoup import BeautifulSoup
                    
7
                    
12
                    
13It includes a spurious </HEAD> before the useful data begins and lines like '<option value="Bill"selected="selected">Bill</option>', in which the lack of a space between the attributes confuses BeautifulSoup.
                    
14'''
                    
69                    return False
                    
70                soup = BeautifulSoup(cleansource(data))
                    
71                rows = soup.findAll('table')[1].findAll('tr')[1:]
                    
91        with self.urlopen(histurl) as data:
                    
92            soup = BeautifulSoup(cleansource(data))
                    
93            basicinfo = soup.findAll('div', id='bhistleft')[0]
                    
                
lequipe_fr.py https://gitlab.com/edelans/scoragora | Python | 180 lines
                    
55	html = requests.get(url).text
                    
56	soup = BeautifulSoup.BeautifulSoup(html)
                    
57	# Get date
                    
85	html = requests.get(day_url).text
                    
86	soup = BeautifulSoup.BeautifulSoup(html)
                    
87	result = []
                    
119		html = requests.get(url).text
                    
120		soup = BeautifulSoup.BeautifulSoup(html)
                    
121		root = soup.find(attrs={'name': 'IDNIVEAU'})
                    
147		html = requests.get(url).text
                    
148		soup = BeautifulSoup.BeautifulSoup(html)
                    
149		soup.find(id="timeline")
                    
169		html = requests.get(url).text
                    
170		soup = BeautifulSoup.BeautifulSoup(html)
                    
171		hometeam_score = soup.find(id='scoDom')
                    
                
tpb.py https://github.com/SpLord/CouchPotato.git | Python | 160 lines
                    
4from dateutil.parser import parse
                    
5from imdb.parser.http.bsouplxml._bsoup import SoupStrainer, BeautifulSoup
                    
6from urllib import quote_plus
                    
63            tables = SoupStrainer('table')
                    
64            html = BeautifulSoup(data, parseOnlyThese = tables)
                    
65            resultTable = html.find('table', attrs = {'id':'searchResult'})
                    
154        div = SoupStrainer('div')
                    
155        html = BeautifulSoup(data, parseOnlyThese = div)
                    
156        html = html.find('div', attrs = {'class':'nfo'})
                    
                
searchengine.py https://github.com/kzfm1024/misc.git | Python | 306 lines
                    
1import urllib2
                    
2from BeautifulSoup import *
                    
3from urlparse import urljoin
                    
106        try:
                    
107          soup=BeautifulSoup(c.read())
                    
108          self.addtoindex(page,soup)
                    
                
__init__.py https://bitbucket.org/katey_hack/kindle-touch-l10n.git | Python | 96 lines
                    
40    treeType - the name of the tree type required (case-insensitive). Supported
                    
41               values are "simpletree", "dom", "etree" and "beautifulsoup"
                    
42               
                    
51                          ElementTree, cElementTree and lxml.etree).
                    
52                "beautifulsoup" - Beautiful soup (if installed)
                    
53               
                    
70            treeBuilderCache[treeType] = simpletree.TreeBuilder
                    
71        elif treeType == "beautifulsoup":
                    
72            import soup
                    
                
KitMensaService.py https://gitlab.com/namboy94/messengerbot | Python | 230 lines
                    
26import requests
                    
27from bs4 import BeautifulSoup
                    
28from typing import Tuple
                    
162            html = requests.get(url).text
                    
163            soup = BeautifulSoup(html, "html.parser")
                    
164            resource = soup.select('body')
                    
                
2010-11-21-exploring_art_data_3.md https://gitlab.com/rheaplex/robmyers.org | Markdown | 172 lines
                    
23    <tt>#!/usr/bin/python
                    
24    from BeautifulSoup import BeautifulStoneSoup
                    
25    import re
                    
                
test_markdown_to_html.py https://gitlab.com/Ivy001/pants | Python | 186 lines
                    
173
                    
174      soup = bs4.BeautifulSoup(markup=html)
                    
175      self.assertIsNotNone(soup.find(text='A good link:'))
                    
                
utils.py https://github.com/Gautier/django-page-cms.git | Python | 139 lines
                    
119        return content
                    
120    from BeautifulSoup import BeautifulSoup
                    
121    tree = BeautifulSoup(content)
                    
                
util.py https://gitlab.com/Lett1/SlackDuckBot | Python | 151 lines
                    
5import re
                    
6from bs4 import BeautifulSoup
                    
7from urllib.request import Request, urlopen
                    
113    else:
                    
114        soup = BeautifulSoup(html, "lxml")
                    
115        if soup.title is not None:
                    
                
release.py https://gitlab.com/LocutusOfPenguin/python-chess | Python | 178 lines
                    
141        sys.exit(1)
                    
142    soup = bs4.BeautifulSoup(res.text, "html.parser")
                    
143    csrf = soup.find("input", {"name": "CSRFToken"})["value"]
                    
                
types.py https://github.com/rxuriguera/bibtexIndexMaker.git | Python | 87 lines
                    
37    self.msg attribute contains explanation why parsing failed
                    
38    self.tag attribute contains BeautifulSoup object with the most relevant tag
                    
39    that failed to parse
                    
                
post.py https://github.com/mw44118/blogofile.git | Python | 309 lines
                    
22import logging
                    
23import BeautifulSoup
                    
24
                    
138             """Retrieve excerpt from article"""
                    
139             s = BeautifulSoup.BeautifulSoup(self.content)
                    
140             # get rid of javascript, noscript and css
                    
                
setup.py https://github.com/eged/django-blog-zinnia.git | Python | 37 lines
                    
33                        'akismet',
                    
34                        'BeautifulSoup',
                    
35                        ])
                    
                
inject.py https://gitlab.com/BoTranVan/MITMf | Python | 195 lines
                    
23
                    
24from bs4 import BeautifulSoup
                    
25from plugins.plugin import Plugin
                    
86    	    if encoding is not None:
                    
87                html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml")
                    
88    	    else:
                    
88    	    else:
                    
89                html = BeautifulSoup(data, "lxml")
                    
90
                    
98                if self.html_payload:
                    
99                    payload = BeautifulSoup(self.html_payload, "html.parser")
                    
100                    html.body.append(payload)
                    
104                    with open(self.html_file, 'r') as file:
                    
105                        payload = BeautifulSoup(file.read(), "html.parser")
                    
106                        html.body.append(payload)
                    
                
share_post.py https://gitlab.com/janninematt/janninematt | Python | 81 lines
                    
8
                    
9from bs4 import BeautifulSoup
                    
10try:
                    
18def article_title(content):
                    
19    main_title = BeautifulSoup(content.title, 'html.parser').get_text().strip()
                    
20    sub_title = ''
                    
21    if hasattr(content, 'subtitle'):
                    
22        sub_title = ' ' + BeautifulSoup(content.subtitle, 'html.parser').get_text().strip()
                    
23    return quote(('%s%s' % (main_title, sub_title)).encode('utf-8'))
                    
31def article_summary(content):
                    
32    return quote(BeautifulSoup(content.summary, 'html.parser').get_text().strip().encode('utf-8'))
                    
33
                    
                
cablemodem_check.py https://gitlab.com/mikeos2/Nagios_Plugins | Python | 223 lines
                    
38try:
                    
39    from bs4 import BeautifulSoup
                    
40except ImportError:
                    
40except ImportError:
                    
41    print "Error: (" + str(Nagios_UNKNOWN) + ") install BeautifulSoup!"
                    
42    sys.exit(Nagios_UNKNOWN)
                    
69
                    
70        return BeautifulSoup(page)
                    
71
                    
                
subtitle-downloader.py https://gitlab.com/132nd-etcher/subtitle-downloader | Python | 124 lines
                    
18import requests,time,re,zipfile
                    
19from bs4 import BeautifulSoup
                    
20PY_VERSION = sys.version_info[0]
                    
74        r=requests.get("http://subscene.com/subtitles/release?q="+root);
                    
75        soup=BeautifulSoup(r.content,"lxml")
                    
76        atags=soup.find_all("a")
                    
83            r=requests.get("http://subscene.com"+href);
                    
84            soup=BeautifulSoup(r.content,"lxml")
                    
85            lin=soup.find_all('a',attrs={'id':'downloadButton'})[0].get("href")
                    
86            r=requests.get("http://subscene.com"+lin);
                    
87            soup=BeautifulSoup(r.content,"lxml")
                    
88            subfile=open(root2+".zip", 'wb')
                    
                
base.py https://github.com/tallstreet/jaikuenginepatch.git | Python | 209 lines
                    
17
                    
18from beautifulsoup import BeautifulSoup
                    
19
                    
193    self.assertWellformed(response)
                    
194    parsed = BeautifulSoup.BeautifulSoup(response.content)
                    
195    found = parsed.findAll('a', attrs = { 'class': link_class})
                    
                
get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 117 lines
                    
14    state = 'nc'
                    
15    soup_parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse
                    
16
                    
                
Weather.py https://gitlab.com/leiftomas/jasper-client | Python | 172 lines
                    
43                     'international_cities.asp')
                    
44    soup = bs4.BeautifulSoup(r.text)
                    
45    data = soup.find(id="inner-content").find('pre').string
                    
                
baseparser.py https://gitlab.com/andyblaesus/newsdiffs | Python | 156 lines
                    
52# Ick.
                    
53from BeautifulSoup import BeautifulSoup
                    
54def bs_fixed_getText(self, separator=u""):
                    
54def bs_fixed_getText(self, separator=u""):
                    
55    bsmod = sys.modules[BeautifulSoup.__module__]
                    
56    if not len(self.contents):
                    
65    return separator.join(strings)
                    
66sys.modules[BeautifulSoup.__module__].Tag.getText = bs_fixed_getText
                    
67# End fix
                    
111
                    
112    feeder_bs = BeautifulSoup #use this version of beautifulsoup for feed
                    
113
                    
                
fileops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 136 lines
                    
6from pprint import pprint
                    
7from bs4 import BeautifulSoup
                    
8from sh import git
                    
87    with open(filename, 'w') as f:
                    
88        f.write(BeautifulSoup(html, 'html5lib').prettify() + '\n')
                    
89    gitCommitModel(contest['model'], filename, 'contest created: ' + model['slug'])
                    
105    with open(filename, 'w') as f:
                    
106        f.write(BeautifulSoup(html, "html5lib").prettify() + "\n")
                    
107    gitCommitModel(challenge, filename, 'challenge created: ' + challenge['slug'])
                    
                
plugin.py https://github.com/lbjay/supybot-plugins.git | Python | 47 lines
                    
9
                    
10from BeautifulSoup import BeautifulSoup
                    
11
                    
23
                    
24    soup =  BeautifulSoup(doc)
                    
25    dd = soup.find('dd', 'highlight')
                    
                
wikipediaidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 211 lines
                    
4import re
                    
5from exe.engine.beautifulsoup import BeautifulSoup
                    
6from exe.engine.idevice       import Idevice
                    
73        page = page.replace(u'&#160;', u'&nbsp;')
                    
74        soup = BeautifulSoup(page, False)
                    
75        content = soup.first('div', {'id': "content"})
                    
                
sitegen.py https://gitlab.com/Ivy001/pants | Python | 374 lines
                    
36  import bs4
                    
37  return bs4.BeautifulSoup(*args, **kwargs)
                    
38
                    
70def load_soups(config):
                    
71  """Generate BeautifulSoup AST for each page listed in config."""
                    
72  soups = {}
                    
                
phew.py https://gitlab.com/fnaticshank/crawler | Python | 327 lines
                    
13import requests
                    
14from bs4 import BeautifulSoup
                    
15
                    
213                        errors="replace")
                    
214                soup = BeautifulSoup(content, "lxml")
                    
215                tags = soup('a')
                    
                
test_pipreqs.py https://gitlab.com/Kravcenko/pipreqs | Python | 188 lines
                    
23                        'peewee', 'ujson', 'nonexistendmodule', 'bs4', 'after_method_is_valid_even_if_not_pep8' ]
                    
24        self.modules2 = ['beautifulsoup4']
                    
25        self.local = ["docopt", "requests", "nose", 'pyflakes']
                    
                
layouttestresults.py https://gitlab.com/x33n/phantomjs | Python | 91 lines
                    
31from webkitpy.common.net.resultsjsonparser import ResultsJSONParser
                    
32from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup, SoupStrainer
                    
33from webkitpy.layout_tests.models import test_results
                    
                
__init__.py https://github.com/oesmith/django-css.git | Python | 316 lines
                    
3import subprocess
                    
4from BeautifulSoup import BeautifulSoup
                    
5from tempfile import NamedTemporaryFile
                    
54        self.split_content = []
                    
55        self.soup = BeautifulSoup(self.content)
                    
56        self.xhtml = xhtml
                    
268                    basename = os.path.splitext(os.path.basename(filename))[0]
                    
269                    elem = BeautifulSoup(re.sub(basename+ext,basename+'.css',unicode(elem)))
                    
270                    filename = path + '.css'
                    
                
accelometer_metadata_creator.py https://gitlab.com/heavelock/metadata_creator | Python | 198 lines
                    
1from bs4 import BeautifulSoup
                    
2import scipy.io
                    
9def parse_event_names(file):
                    
10    soup = BeautifulSoup(open(file), 'lxml-xml')
                    
11    parsed_meta = {}
                    
                
testFunctional.py https://github.com/eaudeweb/Naaya.git | Python | 190 lines
                    
4from StringIO import StringIO
                    
5from BeautifulSoup import BeautifulSoup
                    
6from mock import patch
                    
180        html = self.browser.get_html()
                    
181        soup = BeautifulSoup(html)
                    
182
                    
                
shotchart_cbssports.py https://github.com/kpascual/nbascrape.git | Python | 127 lines
                    
6import logging
                    
7from BeautifulSoup import BeautifulSoup
                    
8from libscrape.config import constants
                    
                
Makefile https://github.com/freebsd/freebsd-ports.git | Makefile | 34 lines
                    
17		${PYTHON_PKGNAMEPREFIX}psutil>=2.0:sysutils/py-psutil@${PY_FLAVOR}
                    
18RUN_DEPENDS=	${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \
                    
19		${PYTHON_PKGNAMEPREFIX}importlib-metadata>0:devel/py-importlib-metadata@${PY_FLAVOR} \
                    
                
pingback.py https://github.com/aparo/django-blog-zinnia.git | Python | 141 lines
                    
17from zinnia.settings import PINGBACK_CONTENT_LENGTH
                    
18from BeautifulSoup import BeautifulSoup
                    
19from django_xmlrpc.decorators import xmlrpc_func
                    
94
                    
95        soup = BeautifulSoup(document)
                    
96        title = soup.find('title')
                    
                
1.4.9-0004-Fix-3608-Replace-discogs-client-with-python3-discogs.patch https://gitlab.com/redcore/portage | Patch | 134 lines
                    
71-    tests_require=[
                    
72-        'beautifulsoup4',
                    
73-        'flask',
                    
88+        'test': [
                    
89+            'beautifulsoup4',
                    
90+            'coverage',
                    
                
setup.py https://gitlab.com/simont3/awftp | Python | 117 lines
                    
91    # https://packaging.python.org/en/latest/technical.html#install-requires-vs-requirements-files
                    
92    install_requires=['requests>=2.13.0', 'click>=6.7', 'beautifulsoup4>=4.6.0'],
                    
93
                    
                
jitsimeetbridge.py https://gitlab.com/JigmeDatse/synapse | Python | 260 lines
                    
15import grequests
                    
16from BeautifulSoup import BeautifulSoup
                    
17import json
                    
144        resps = grequests.map([req])
                    
145        obj = BeautifulSoup(resps[0].content)
                    
146        return obj
                    
163        self.ssrcs = {}
                    
164        jingleSoup = BeautifulSoup(jingle)
                    
165        for cont in jingleSoup.iq.jingle.findAll('content'):
                    
                
adapter_twilightednet.py https://code.google.com/p/fanficdownloader/ | Python | 254 lines
                    
25
                    
26from .. import BeautifulSoup as bs
                    
27from ..htmlcleanup import stripHTML
                    
128        
                    
129        # use BeautifulSoup HTML parser to make everything easier to find.
                    
130        soup = bs.BeautifulSoup(data)
                    
214            
                    
215            # use BeautifulSoup HTML parser to make everything easier to find.
                    
216            seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
                    
                
olympic.py https://gitlab.com/camilo-celis/DB_SQuirreL | Python | 250 lines
                    
8import urllib2
                    
9from bs4 import BeautifulSoup
                    
10import csv
                    
31        #Create the soup object from the HTML data
                    
32        self.soup = BeautifulSoup(html_data)
                    
33
                    
40            url = "http://databaseolympics.com/games/gamesyear.htm?g=%s" % link['href'].split('=')[1]
                    
41            page = BeautifulSoup(urllib2.urlopen(url).read(), from_encoding="iso-8859-1")
                    
42
                    
56    def get_country_data(self, country_url):
                    
57        page = BeautifulSoup(urllib2.urlopen(country_url).read(), from_encoding="iso-8859-1")
                    
58
                    
83
                    
84            page = BeautifulSoup(urllib2.urlopen(url).read(), from_encoding="iso-8859-1")
                    
85
                    
                
download.py https://gitlab.com/qijungu/stock | Python | 271 lines
                    
3
                    
4from bs4 import BeautifulSoup
                    
5from datetime import datetime
                    
68            r = session.get(hurl)
                    
69            page = str(BeautifulSoup(r.content, 'lxml'))
                    
70            
                    
82            r = session.get(durl)
                    
83            page = BeautifulSoup(r.content, 'lxml').select_one('p')
                    
84            
                    
193            r = session.get(hurl)
                    
194            page = BeautifulSoup(r.content, 'lxml').select_one('p')
                    
195            data = page.text.strip()
                    
                
importer.py https://github.com/gregmalcolm/Bookie.git | Python | 201 lines
                    
2from datetime import datetime
                    
3from BeautifulSoup import BeautifulSoup
                    
4from bookie.models import BmarkMgr
                    
78
                    
79        soup = BeautifulSoup(file_io)
                    
80        can_handle = False
                    
90        """Given a file, process it"""
                    
91        soup = BeautifulSoup(self.file_handle)
                    
92
                    
140        """
                    
141        soup = BeautifulSoup(file_io)
                    
142        can_handle = False
                    
157        """
                    
158        soup = BeautifulSoup(self.file_handle)
                    
159        if not soup.contents[0] == "DOCTYPE NETSCAPE-Bookmark-file-1":
                    
                
nzbclub.py https://gitlab.com/132nd-etcher/CouchPotatoServer | Python | 100 lines
                    
2
                    
3from bs4 import BeautifulSoup
                    
4from couchpotato.core.helpers.encoding import toUnicode
                    
56        full_description = self.getCache('nzbclub.%s' % item['id'], item['detail_url'], cache_timeout = 25920000)
                    
57        html = BeautifulSoup(full_description)
                    
58        nfo_pre = html.find('pre', attrs = {'class': 'nfo'})
                    
                
redditstories.py https://github.com/tuxcanfly/django-socialnews.git | Python | 249 lines
                    
18import datetime
                    
19from BeautifulSoup import BeautifulSoup
                    
20
                    
71    stories = []
                    
72    soup = BeautifulSoup(content)
                    
73    entries = soup.findAll('div', id=re.compile('entry_.*'))
                    
187def _get_next_page(content):
                    
188    soup = BeautifulSoup(content)
                    
189    a = soup.find(lambda tag: tag.name == 'a' and tag.string == 'next')
                    
                
grade-interactive.py https://gitlab.com/rshipp/mines-grading | Python | 194 lines
                    
14# Everything should work with Python 2.7+, although I haven't explicitly tested
                    
15# it on 3+. The logging/debugging stuff makes use of the BeautifulSoup library
                    
16# for parsing HTML (http://www.crummy.com/software/BeautifulSoup), but it's not
                    
21import mechanize, getpass, re, time, zipfile, glob, os, subprocess, sys
                    
22from bs4 import BeautifulSoup
                    
23
                    
23
                    
24# Logging: Write BeautifulSoup HTML to file
                    
25def writeHTML(page, filename):
                    
68initialResponse = browser.open("https://cs.mcprogramming.com/djintro/entry/")
                    
69homeHTML = BeautifulSoup(initialResponse.get_data())
                    
70# writeHTML(homeHTML, 'home')
                    
71
                    
72# Find the login link using BeautifulSoup and follow it
                    
73loginLink = homeHTML.find('a', 'mymaillogin')
                    
                
udacity_crawler.py https://gitlab.com/fnaticshank/crawler | Python | 263 lines
                    
4import robotexclusionrulesparser as rerp
                    
5from bs4 import BeautifulSoup
                    
6from urlparse import urlparse, urljoin
                    
140		print "[get_page()] Page off limits!"
                    
141		return BeautifulSoup(""), ""
                    
142	if url in cache:
                    
147			content = urllib.urlopen(url).read()
                    
148			return BeautifulSoup(content), url
                    
149		except:
                    
149		except:
                    
150			return BeautifulSoup(""), ""
                    
151
                    
                
get_legislation.py https://github.com/rshapiro/fiftystates.git | Python | 184 lines
                    
3import re
                    
4from BeautifulSoup import BeautifulSoup
                    
5import datetime as dt
                    
50        self.log("Getting bill list for %s %s" % (chamber, session))
                    
51        bill_list = BeautifulSoup(self.urlopen(bill_list_url))
                    
52
                    
59            bill_info_url = "http://www.leg.state.vt.us" + bill_link['href']
                    
60            info_page = BeautifulSoup(self.urlopen(bill_info_url))
                    
61
                    
109        self.log("Getting bill list for %s %s" % (chamber, session))
                    
110        bill_list = BeautifulSoup(urllib2.urlopen(bill_list_url, data))
                    
111
                    
117
                    
118            info_page = BeautifulSoup(self.urlopen(
                    
119                    "http://www.leg.state.vt.us" + bill_link['href']))
                    
                
bsoupxpath.py https://github.com/jsmiller84/CouchPotato.git | Python | 394 lines
                    
3
                    
4This module provides XPath support for BeautifulSoup.
                    
5
                    
28import string
                    
29import _bsoup as BeautifulSoup
                    
30
                    
107            # for an absolute path, start from the root
                    
108            if not isinstance(node, BeautifulSoup.Tag) \
                    
109               or (node.name != '[document]'):
                    
180        """Parse the predicate. Return a callable that can be used to filter
                    
181        nodes. Update `self.soup_args` to take advantage of BeautifulSoup search
                    
182        features.
                    
268                last = node
                    
269                while (not isinstance(last, BeautifulSoup.NavigableString)) \
                    
270                      and (len(last.contents) > 0):
                    
                
__init__.py https://bitbucket.org/rattray/popcorn-portal.git | Python | 307 lines
                    
75
                    
76# The BeautifulSoup class will take feature lists from developers and use them
                    
77# to look up builders in this registry.
                    
                
amf_serializer.rb https://github.com/brownman/flexonrails.git | Ruby | 364 lines
                    
111         
                    
112        elsif (value.class.to_s == 'BeautifulSoup')
                    
113          write_xml(value.to_s)
                    
159          # I know we can combine this with the last condition, but don't  ; the Rexml and Beautiful Soup test is expensive, and for large record sets with many AR its better to be able to skip the next step
                    
160        elsif value.is_a?(ActiveRecord::Base) # Aryk: this way, we can bypass the "['REXML::Document', 'BeautifulSoup'].include?(value.class.to_s) " operation
                    
161          write_amf3_object(value)
                    
162      
                    
163        elsif ['REXML::Document', 'BeautifulSoup'].include?(value.class.to_s) 
                    
164          write_byte(AMF3_XML)
                    
                
convert_notebooks_to_html_partial.py https://gitlab.com/dibya/textbook-tools | Python | 152 lines
                    
134    """Return a html partial of divs with cell contents."""
                    
135    doc = bs4.BeautifulSoup(html, 'html5lib')
                    
136
                    
                
extraer_datos_composicion_alimentos.py https://gitlab.com/FoodUpProject/FoodUp | Python | 54 lines
                    
2import urllib2,unicodedata
                    
3from bs4 import BeautifulSoup
                    
4 
                    
7    html = conexion.read()
                    
8    soup = BeautifulSoup(html)
                    
9    #obtenemos una lista de String con la condiciĆ³n de atributos class con valores details y price
                    
                
sponsoredlinks.py https://gitlab.com/oytunistrator/PwnBerryPi | Python | 235 lines
                    
14from htmlentitydefs import name2codepoint
                    
15from BeautifulSoup import BeautifulSoup
                    
16
                    
30    self.msg attribute contains explanation why parsing failed
                    
31    self.tag attribute contains BeautifulSoup object with the most relevant tag that failed to parse
                    
32    Thrown only in debug mode
                    
158
                    
159        return BeautifulSoup(page)
                    
160
                    
                
default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 341 lines
                    
46    sys.path.append(os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib'))
                    
47    from BeautifulSoup  import BeautifulSoup
                    
48except:
                    
50        sys.path.insert(0, os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib'))
                    
51        from BeautifulSoup  import BeautifulSoup
                    
52    except:
                    
53        sys.path.append(os.path.join(os.getcwd(), r'resources', r'lib'))
                    
54        from BeautifulSoup  import BeautifulSoup
                    
55        icon = xbmc.translatePath(os.path.join(os.getcwd().replace(';', ''),'icon.png'))
                    
136    html = re.compile('<body>(.+?)<\/body>', re.MULTILINE|re.DOTALL).findall(html)[0]
                    
137    soup = BeautifulSoup(html)
                    
138
                    
162    html = re.compile('<body>(.+?)<\/body>', re.MULTILINE|re.DOTALL).findall(html)[0]
                    
163    soup = BeautifulSoup(html)
                    
164
                    
                
get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 118 lines
                    
4import re
                    
5from BeautifulSoup import BeautifulSoup
                    
6
                    
45        doc = response.read()
                    
46        soup = BeautifulSoup(doc)
                    
47
                    
                
browser.py https://gitlab.com/phyks/weboob | Python | 120 lines
                    
22from weboob.deprecated.browser.parsers.iparser import IParser
                    
23import BeautifulSoup
                    
24
                    
32    def parse(self, data, encoding=None):
                    
33        return BeautifulSoup.BeautifulSoup(data.read().decode(encoding or 'utf-8'), convertEntities=BeautifulSoup.BeautifulStoneSoup.ALL_ENTITIES)
                    
34
                    
                
bills.py https://github.com/runderwood/openstates.git | Python | 114 lines
                    
3import re
                    
4from BeautifulSoup import BeautifulSoup
                    
5
                    
45        with self.urlopen(search_url + '?' + params) as doc:
                    
46            soup = BeautifulSoup(doc)
                    
47
                    
                
upnp.py https://gitlab.com/balhau/pyutils | Python | 152 lines
                    
8import requests
                    
9from bs4 import BeautifulSoup
                    
10
                    
95
                    
96soup=BeautifulSoup(r.data)
                    
97
                    
135
                    
136ipxml=BeautifulSoup(r.text)
                    
137
                    
145
                    
146rhxml=BeautifulSoup(r.text)
                    
147
                    
                
testFunctional.py https://github.com/mihneasim/Naaya.git | Python | 208 lines
                    
23from StringIO import StringIO
                    
24from BeautifulSoup import BeautifulSoup
                    
25
                    
193        html = self.browser.get_html()
                    
194        soup = BeautifulSoup(html)
                    
195
                    
                
default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 259 lines
                    
31    sys.path.append(os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib'))
                    
32    from BeautifulSoup  import BeautifulSoup
                    
33except:
                    
35        sys.path.insert(0, os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib'))
                    
36        from BeautifulSoup  import BeautifulSoup
                    
37    except:
                    
38        sys.path.append(os.path.join(os.getcwd(), r'resources', r'lib'))
                    
39        from BeautifulSoup  import BeautifulSoup
                    
40        icon = xbmc.translatePath(os.path.join(os.getcwd().replace(';', ''),'icon.png'))
                    
77
                    
78    soup = BeautifulSoup(html, fromEncoding="windows-1251")
                    
79
                    
119
                    
120    soup = BeautifulSoup(html, fromEncoding="windows-1251")
                    
121
                    
                
rfc822.py https://gitlab.com/wilane/superdesk | Python | 264 lines
                    
23from superdesk.errors import IngestEmailError
                    
24from bs4 import BeautifulSoup, Comment, Doctype
                    
25import re
                    
218        try:
                    
219            # BeautifulSoup is catching out-of-order and unclosed tags, so markup
                    
220            # can't leak out of comments and break the rest of the page.
                    
220            # can't leak out of comments and break the rest of the page.
                    
221            soup = BeautifulSoup(html)
                    
222        except Exception as e:
                    
                
simple_dehasher.py https://gitlab.com/8wiw/python-dehasher | Python | 165 lines
                    
16    import time
                    
17    from bs4 import BeautifulSoup
                    
18    from colorama import Fore, Style
                    
130    PAGE = requests.get(URL, headers=HEADERS)  # Uses requests lib to get the content of the page
                    
131    PAGE_CONTENT = BeautifulSoup(PAGE.content, "html.parser").get_text()
                    
132#--------------------------------------------------------------------------------------------------
                    
138    PAGE = requests.get(URL, headers=HEADERS)  # Uses requests lib to get the content of the page
                    
139    PAGE_CONTENT = BeautifulSoup(PAGE.content, "html.parser").get_text()
                    
140    if "ERROR CODE : 001" in PAGE_CONTENT:
                    
                
TextRank.py https://bitbucket.org/arka7z/information-retrieval.git | Python | 271 lines
                    
16from functools import reduce
                    
17from bs4 import BeautifulSoup
                    
18from nltk.tokenize.punkt import PunktSentenceTokenizer
                    
78        file_content=f.read()
                    
79        soup = BeautifulSoup(file_content, "lxml")
                    
80        text_group = soup.get_text()
                    
                
scrape.py https://gitlab.com/mkhouri/news_scraper | Python | 72 lines
                    
1import re
                    
2from bs4 import BeautifulSoup
                    
3from urllib.parse import urlparse
                    
7def parse(url, pageHtml, bodyLines):
                    
8    soup = BeautifulSoup(pageHtml, "lxml")
                    
9    host = urlparse(url).hostname
                    
                
4chan_downloader.py https://gitlab.com/8wiw/4chan_downloader | Python | 147 lines
                    
7	import requests, time, sys, os
                    
8	from bs4 import BeautifulSoup
                    
9	from colorama import Fore, Style
                    
91				log_user_stop(board)
                    
92			souped = BeautifulSoup(r.text, 'html.parser')
                    
93			img_tags = souped.find_all('img')
                    
                
adapter_adastrafanficcom.py https://code.google.com/p/fanficdownloader/ | Python | 239 lines
                    
25
                    
26from .. import BeautifulSoup as bs
                    
27from ..htmlcleanup import stripHTML
                    
95        
                    
96        # use BeautifulSoup HTML parser to make everything easier to find.
                    
97        soup = bs.BeautifulSoup(data)
                    
199
                    
200            # use BeautifulSoup HTML parser to make everything easier to find.
                    
201            seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))
                    
                
hulu.py https://github.com/barttenbrinke/Bartsidee-Repository.git | Python | 124 lines
                    
3import ba, md5, time, base64
                    
4from beautifulsoup.BeautifulSoup import BeautifulSoup
                    
5from urllib import quote_plus
                    
24        data = data.replace('\\u003c','<').replace('\\u003e','>').replace('\\','').replace('\\n','').replace('\\t','')
                    
25        soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml")
                    
26
                    
42
                    
43        soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml")
                    
44        totalpage = len(soup.findAll('tr', 'srh'))
                    
64        data = data.replace('\\u003c','<').replace('\\u003e','>').replace('\\','')
                    
65        soup = BeautifulSoup(data)
                    
66
                    
                
utils.py https://github.com/theinterned/batucada.git | Python | 116 lines
                    
9
                    
10from BeautifulSoup import BeautifulSoup
                    
11
                    
70    """
                    
71    soup = BeautifulSoup(content)
                    
72    links = soup.findAll('link')
                    
73
                    
74    # BeautifulSoup instances are not actually dictionaries, so
                    
75    # we can't use the more proper 'key in dict' syntax and
                    
                
 

Source

Language