100+ results results for 'beautifulsoup' (187 ms)
11from PIL import ImageDraw 12from bs4 import BeautifulSoup 13 93 raw_html = self.browser.response().read() 94 soup = BeautifulSoup(raw_html) 95 149 mechanize object.''' 150 soup = BeautifulSoup(self.response) 151 formcount=0 165 mechanize object.''' 166 soup = BeautifulSoup(self.response) 167 formcount=0 180 ''' Displays all the links from the current browser page.''' 181 soup = BeautifulSoup(raw_html) 182 categories_table_obj = soup.find("table", attrs={"id":'my_table'})google_soup.py https://github.com/chudler/Community-Zenpacks.git | Python | 221 lines
12import logging.handlers 13from BeautifulSoup import BeautifulSoup 14from ClientForm import * 83 def findApplications(self): 84 main_apps = BeautifulSoup(self.resetClient()) 85 column_headers = [] 87 for table in main_apps.findAll('table', limit=1): 88 # table is a BeautifulSoup.Tag object 89 column_headers = self.extract_headings(table) 148 quota_details = re.sub(quota_fix, '', quota_details) 149 quota_soup = BeautifulSoup(quota_details) 150 quota_section = quota_soup.find(attrs={'id':'ae-quota-details'}) 168 app_main = open('/tmp/dashboard.html', 'r').read() 169 app_soup = BeautifulSoup(app_main) 170 load_section = app_soup.find(text=re.compile('Current Load'))util.py https://github.com/sunlightlabs/muni_words.git | Python | 198 lines
8from django.contrib.gis.geos import Point 9from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup 10from excludes import EXCLUDED 40def strip_html(string): 41 return ''.join([e for e in BeautifulSoup(string).recursiveChildGenerator() if isinstance(e, unicode)]).replace(' ', ' ') 42testFunctional.py https://github.com/bogtan/Naaya.git | Python | 213 lines
21from unittest import TestSuite, makeSuite 22from BeautifulSoup import BeautifulSoup 23 148 html = self.browser.get_html() 149 soup = BeautifulSoup(html) 150_htmlparser.py https://github.com/yoheia/yoheia.git | Python | 265 lines
53 54class BeautifulSoupHTMLParser(HTMLParser): 55 def handle_starttag(self, name, attrs): 163 args, kwargs = self.parser_args 164 parser = BeautifulSoupHTMLParser(*args, **kwargs) 165 parser.soup = self.soup 169 warnings.warn(RuntimeWarning( 170 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) 171 raise e 198""", re.VERBOSE) 199 BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend 200 261 262 BeautifulSoupHTMLParser.parse_starttag = parse_starttag 263 BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_modemain.py https://github.com/gtracy/APODEmail.git | Python | 249 lines
18 19from BeautifulSoup import BeautifulSoup, Tag 20 184 185 soup = BeautifulSoup(result.content) 186 #logging.debug(soup)TV3Scrapper.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 254 lines
9import sys 10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup 11import urllib, urllib2TVSeriesUtil.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 224 lines
9import sys 10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup 11import urllib, urllib2, cookielibhackerrankops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 200 lines
9import logging # TODO get rid of these print statements! 10from bs4 import BeautifulSoup 11 173 return 174 csrfHtml = BeautifulSoup(r.text, 'html.parser').find(id = 'csrf-token') 175 if csrfHtml:diagnose.py https://gitlab.com/Rheinhart/csuchen-Guard | Python | 216 lines
8import bs4 9from bs4 import BeautifulSoup, __version__ 10from bs4.builder import builder_registry 69 try: 70 soup = BeautifulSoup(data, parser) 71 success = True 178 a = time.time() 179 soup = BeautifulSoup(data, parser) 180 b = time.time() 207 vars = dict(bs4=bs4, data=data, parser=parser) 208 cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) 209default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 406 lines
32 33from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup 34import socket 108 109 beautifulSoup = BeautifulSoup(http) 110 userPanel = beautifulSoup.find('a', {"id": "loginlink"}) 171 172 beautifulSoup = BeautifulSoup(http) 173 content = beautifulSoup.find('div', attrs={'id': 'dle-content'}) 243 244 beautifulSoup = BeautifulSoup(http) 245 categoryContainer = beautifulSoup.find('ul', 'cats') 273 274 beautifulSoup = BeautifulSoup(http) 275 tagsContainer = beautifulSoup.find('td', 'news')jobs.py git://pkgs.fedoraproject.org/sugar-read | Python | 310 lines
26import os.path 27import BeautifulSoup 28 71 def _searchfile(self, fileobj): 72 soup = BeautifulSoup.BeautifulSoup(fileobj) 73 body = soup.find('body')test_converter_unittest.py https://gitlab.com/x33n/phantomjs | Python | 319 lines
34from webkitpy.common.system.outputcapture import OutputCapture 35from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup 36from webkitpy.w3c.test_converter import W3CTestConverter 185 186 doc = BeautifulSoup(test_html) 187 oc = OutputCapture() 266 try: 267 converted = converter.convert_prefixed_properties(BeautifulSoup(test_content[1]), DUMMY_FILENAME) 268 finally: 281 if isinstance(converted, basestring): 282 converted = BeautifulSoup(converted) 283mastodon.scm https://gitlab.com/daym/guix | Scheme | 174 lines
60 (inputs 61 `(("python-beautifulsoup4" ,python-beautifulsoup4) 62 ("python-requests" ,python-requests)browser.py https://bitbucket.org/chahaojia/eve_moniter.git | Python | 236 lines
87 import BeautifulSoup 88 return BeautifulSoup.BeautifulSoup(self.data) 89get_legislation.py https://github.com/jdunck/fiftystates.git | Python | 112 lines
4import datetime as dt 5from BeautifulSoup import BeautifulSoup 6 18 19 # Get the details page and parse it with BeautifulSoup. These 20 # pages contain a malformed 'p' tag that (certain versions of) 23 details_raw = details_raw.replace('<P ALIGN=CENTER">', '') 24 details = BeautifulSoup(details_raw) 25 26 # Get the history page (following a link from the details page). 27 # Once again, we remove tags that BeautifulSoup chokes on 28 # (including all meta tags, because bills with quotation marks 35 history_raw = rem_meta.sub('</title></head>', history_raw) 36 history = BeautifulSoup(history_raw) 37lec_04_scraping.ipynb https://gitlab.com/xbsd/content | Jupyter | 379 lines
23 "from pattern import web\n", 24 "from BeautifulSoup import BeautifulSoup" 25 ], 261 "source": [ 262 "# Using BeautifulSoup" 263 ] 268 "input": [ 269 "bs = BeautifulSoup(r.text)\n", 270 "for movie in bs.findAll('td', 'title'):\n",ford.rb https://gitlab.com/0072016/homebrew-core | Ruby | 153 lines
25 26 resource "beautifulsoup4" do 27 url "https://pypi.python.org/packages/26/79/ef9a8bcbec5abc4c618a80737b44b56f1cb393b40238574078c5002b97ce/beautifulsoup4-4.4.1.tar.gz" 67 ENV.prepend_create_path "PYTHONPATH", libexec/"vendor/lib/python2.7/site-packages" 68 deps = %w[beautifulsoup4 graphviz Jinja2 Markdown markdown-include MarkupSafe Pygments toposort] 69 deps << "lxml" if build.with? "lxml"examples.py https://github.com/towerjoo/django-test-extensions.git | Python | 112 lines
103 def test_using_beautiful_soup(self): 104 "Example test for content on a given view, this time using the BeautifulSoup parser" 105 response = self.client.get('/example/') 105 response = self.client.get('/example/') 106 soup = BeautifulSoup(response.content) 107 self.assert_equal("Page Title", soup.find("title").string.strip())feed.py https://bitbucket.org/milos07p/pypsd-nao-on-git.git | Python | 194 lines
6from BaseHTTPServer import BaseHTTPRequestHandler 7from BeautifulSoup import BeautifulSoup 8from decimal import Decimal 92 def get_soup(self): 93 return BeautifulSoup(self._html, convertEntities=BeautifulSoup.HTML_ENTITIES) 94mangafox.py https://bitbucket.org/antoinealb/mangafox.py.git | Python | 150 lines
8import requests 9from bs4 import BeautifulSoup 10import os 34 """ 35 Returns a BeautifulSoup instance made with the HTML of the page at url. 36 """ 37 page = requests.get(url) 38 return BeautifulSoup(page.text) 39lxml.html.ElementSoup-module.html https://github.com/jcrobak/hue.git | HTML | 278 lines
64<h1 class="epydoc">Module ElementSoup</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.ElementSoup-pysrc.html">source code</a></span></p> 65Legacy interface to the BeautifulSoup HTML parser. 66 91 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br /> 92 Convert a BeautifulSoup tree to a list of Element trees.</td> 93 <td align="right" valign="top"> 108 <td><span class="summary-sig"><a name="parse"></a><span class="summary-sig-name">parse</span>(<span class="summary-sig-arg">file</span>, 109 <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>, 110 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span></td> 141 </td><td class="summary"> 142 <a href="lxml.html.ElementSoup-module.html#__doc__" class="summary-name" onclick="show_private();">__doc__</a> = <code title=""""Legacy interface to the BeautifulSoup HTML parser. 143"""">"""Legacy interface to the BeautifulSoup HTML pars<code class="variable-ellipsis">...</code></code> 179 180 <p>Convert a BeautifulSoup tree to a list of Element trees.</p> 181<p>Returns a list instead of a single root Element to supportget-data-2016.ipynb https://bitbucket.org/aabtzu/mlkaggle.git | Jupyter | 410 lines
25 "url = \"http://www.fftoday.com/nfl/schedule.php\"\n", 26 "soup = bs4.BeautifulSoup(requests.get(url).text)\n", 27 "df = pandas.read_html(str(soup.find_all('table')[8]))[0]\n",bills.py https://github.com/runderwood/openstates.git | Python | 293 lines
3 4from BeautifulSoup import BeautifulSoup 5 33 with self.urlopen(index_file) as doc: 34 soup = BeautifulSoup(cleanup_html(doc)) 35 123 with self.urlopen(url) as doc: 124 soup = BeautifulSoup(doc) 125 date=Noneget_manmankan_images.py https://github.com/mitnk/stuff.git | Python | 143 lines
12 13from BeautifulSoup import BeautifulSoup 14 33 page = urllib2.urlopen(url) 34 soup = BeautifulSoup(page, fromEncoding="gb18030") 35 print u"Reading information of %s ..." % soup.findAll("h1")[0].string 58 page = urllib2.urlopen(url) 59 soup = BeautifulSoup(page) 60 javascripts = soup.findAll(text=lambda text: text.parent.name == "script")Makefile https://gitlab.com/lokiexinferis/vim-configs | Makefile | 82 lines
78 virtualenv build/html2vimdoc 79 build/html2vimdoc/bin/pip install beautifulsoup coloredlogs==4.0 markdown 80tracker.py https://gitlab.com/jan.raddatz/myimmitracker-analyzer | Python | 163 lines
3#pip install --proxy proxy:8080 bs4 4from bs4 import BeautifulSoup 5# pip install --proxy proxy:8080 ansicolors 80# r = requests.get(hostname + url_to_scrape, proxies=proxyDict) 81 soup = BeautifulSoup(r.text, 'html.parser') 82 all_tables = soup.find_all('table')module-tree.html https://github.com/jcrobak/hue.git | HTML | 170 lines
73 <ul> 74 <li> <strong class="uidlink"><a href="lxml.html.ElementSoup-module.html">lxml.html.ElementSoup</a></strong>: <em class="summary">Legacy interface to the BeautifulSoup HTML parser.</em> </li> 75 <li class="private"> <strong class="uidlink">lxml.html._dictmixin</strong> </li> 82 <li> <strong class="uidlink"><a href="lxml.html.html5parser-module.html">lxml.html.html5parser</a></strong>: <em class="summary">An interface to html5lib.</em> </li> 83 <li> <strong class="uidlink"><a href="lxml.html.soupparser-module.html">lxml.html.soupparser</a></strong>: <em class="summary">External interface to the BeautifulSoup HTML parser.</em> </li> 84 <li> <strong class="uidlink"><a href="lxml.html.usedoctest-module.html">lxml.html.usedoctest</a></strong>: <em class="summary">Doctest module for HTML comparison.</em> </li>Makefile https://github.com/freebsd/freebsd-ports.git | Makefile | 90 lines
49EXCEL_DESC= MS Excel I/O Add-ons 50HTML5LIB_DESC= Parse HTML with www/py-html5lib and www/py-beautifulsoup 51HTML_DESC= HTML Parsing/Generation Add-ons 52JINJA2_DESC= Support conditional HTML formatting with devel/py-Jinja2 53LXML_DESC= Parse HTML with devel/py-lxml and www/py-beautifulsoup 54MPL_DESC= Support graphical plotting output via math/py-matplotlib 72BTLNCK_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}bottleneck>=1.2.0:math/py-bottleneck@${PY_FLAVOR} 73HTML5LIB_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \ 74 ${PYTHON_PKGNAMEPREFIX}html5lib>0:www/py-html5lib@${PY_FLAVOR} 75JINJA2_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}Jinja2>0:devel/py-Jinja2@${PY_FLAVOR} 76LXML_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \ 77 ${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR}Parser.py https://gitlab.com/Fremis/IRCourse | Python | 276 lines
7 8from bs4 import BeautifulSoup, NavigableString 9import gc 74 try: 75 soup = BeautifulSoup(open_file, 'lxml') 76 except UnicodeDecodeError:ParseHtmlfromFile.py https://github.com/PuercoPop/EleccionesPeru.git | Python | 85 lines
3 4from BeautifulSoup import BeautifulSoup 5import pdb 16 17soup = BeautifulSoup( f_handle ) 18a = soup.findAll('tr',height="40") 23for item in soup.findAll('tr'): 24 for item2 in BeautifulSoup(str(item)).findAll('span',{'class':'arial_contenido_negrita'}): 25 if T_Flag == True: 36 37 for item2 in BeautifulSoup(str(item)).findAll('span',{'class':'arial_contenido'}): 38 if T_Flag == True: 79 #print item.contents 80#b = BeautifulSoup.BeautifulSoup(str(a)) 81#c = BeautifulSoup.BeautifulSoup( str( b.find('td',align="left" ) ) )testFunctional.py https://github.com/bogtan/Naaya.git | Python | 221 lines
22from copy import deepcopy 23from BeautifulSoup import BeautifulSoup 24 153 html = self.browser.get_html() 154 soup = BeautifulSoup(html) 155primewire.py https://bitbucket.org/Leia18/gmc.git | Python | 207 lines
5 6from BeautifulSoup import BeautifulSoup 7from universalscrapers import proxy 28 try: 29 html = BeautifulSoup(self.get_html(title, self.moviesearch_link)) 30 index_items = html.findAll('div', attrs={'class': 'index_item index_item_ie'}) 68 try: 69 html = BeautifulSoup(self.get_html(title, self.tvsearch_link)) 70 index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')}) 89 show_url = urlparse.urljoin(self.base_link, href) 90 html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item')) 91 123 html = proxy.get(url, 'searchform') 124 parsed_html = BeautifulSoup(html) 125 key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]citotron.py https://gitlab.com/maxigas/citotron.git | Python | 265 lines
8from args import args 9from bs4 import BeautifulSoup as bs 10from collections import Counter as counterviews.py https://github.com/dotKom/studlan.git | Python | 310 lines
4 5from bs4 import BeautifulSoup 6 77 78 dom = BeautifulSoup(seating.layout.template, 'html.parser') 79 seat_counter = 0threading_url.py https://bitbucket.org/cheng123/mytools.git | Python | 114 lines
3import time,math,os,re,urllib,urllib2,cookielib 4from BeautifulSoup import BeautifulSoup 5import timecrawler.py https://bitbucket.org/Meister17/wiki-posting-list.git | Python | 120 lines
6import requests 7import BeautifulSoup as BS 8import nltk 46 links = [] 47 soup = BS.BeautifulSoup(request.text) 48 content = nltk.clean_html(request.text.encode('utf8'))HelpIndex.py https://github.com/esitarski/CrossMgr.git | Python | 98 lines
9import re 10from bs4 import BeautifulSoup 11 45 for f in glob.iglob( os.path.join(htmlDocDir, '*.html') ): 46 doc = BeautifulSoup( open(f).read(), 'html.parser' ) 47 div = doc.find('div', class_='content')__init__.py https://github.com/theduke/sehistory.git | Python | 238 lines
2 3from libraries.BeautifulSoup import BeautifulSoup 4 152 def extractLogo(self, html): 153 soup = BeautifulSoup(html) 154__init__.py https://github.com/junalmeida/Sick-Beard.git | Python | 355 lines
3"The Screen-Scraper's Friend" 4http://www.crummy.com/software/BeautifulSoup/ 5 15documentation: 16http://www.crummy.com/software/BeautifulSoup/bs4/doc/ 17""" 23 24__all__ = ['BeautifulSoup'] 25 48 49class BeautifulSoup(Tag): 50 """ 91 "BS4 does not respect the convertEntities argument to the " 92 "BeautifulSoup constructor. Entities are always converted " 93 "to Unicode characters.")soup.py https://github.com/mozilla/affiliates-lib.git | Python | 228 lines
2 3warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning) 4 4 5from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration 6 140 if namespaceHTMLElements: 141 warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning) 142 _base.TreeBuilder.__init__(self, namespaceHTMLElements) 144 def documentClass(self): 145 self.soup = BeautifulSoup("") 146 return Element(self.soup, self.soup, None) 162 if namespace is not None: 163 warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning) 164 return Element(Tag(self.soup, name), self.soup, namespace)generate_featured_pages.py https://github.com/greeness/trendingtopics.git | Python | 153 lines
13import urllib2 14from BeautifulSoup import BeautifulSoup 15import datetime 65 page = opener.open( url ).read() 66 soup = BeautifulSoup(page) 67 return soupfilter.py https://gitlab.com/cobhuni/hadith_alislam_extractor | Python | 197 lines
31from argparse import ArgumentParser 32from bs4 import BeautifulSoup 33import multiprocessing as mp 68 Args: 69 t (class 'bs4.BeautifulSoup'): html to parse 70 fn (str): filename, to trace errors. 72 Returns: 73 class 'bs4.BeautifulSoup': cell containing the text 74 117 with open(os.path.join(args.input_dir, fname)) as inf: 118 soup = BeautifulSoup(inf.read(),'lxml') 119_html5lib.py https://bitbucket.org/apyhtri/irc-bot1.git | Python | 222 lines
82 def fragmentClass(self): 83 self.soup = BeautifulSoup("") 84 self.soup.name = "[document_fragment]"wiki_semantic.py https://bitbucket.org/shishirk/geopy.git | Python | 108 lines
7try: 8 from BeautifulSoup import BeautifulSoup 9except ImportError: 9except ImportError: 10 util.logger.warn("BeautifulSoup was not found. " \ 11 "The SemanticMediaWiki geocoder will not work.") 31 """Parse the URL of the RDF link from the <head> of ``page``.""" 32 soup = BeautifulSoup(page) 33 link = soup.head.find('link', rel='alternate', type=mime_type) 81 page = urlopen(url) 82 soup = BeautifulSoup(page) 83PYopLib.py https://bitbucket.org/y0no/pyopmail.git | Python | 78 lines
4from os.path import join 5from bs4 import BeautifulSoup as bs4 6from bs4 import Commentrequirements_txt_linker_spec.rb https://gitlab.com/YarNayar/gitlab-ce | Ruby | 95 lines
28 nose-cov 29 beautifulsoup4 30 # 70 expect(subject).to include(link('nose-cov', 'https://pypi.python.org/pypi/nose-cov')) 71 expect(subject).to include(link('beautifulsoup4', 'https://pypi.python.org/pypi/beautifulsoup4')) 72 expect(subject).to include(link('docopt', 'https://pypi.python.org/pypi/docopt'))ieo.py https://gitlab.com/rithvikvibhu/batch-sof | Python | 71 lines
3import pprint 4from bs4 import BeautifulSoup 5 47 48 soup = BeautifulSoup(r.text, "html5lib") # Soup up html 49 table_data = [[cell.text for cell in row("td")] 49 table_data = [[cell.text for cell in row("td")] 50 for row in BeautifulSoup(r.text, "html5lib")("tr")] 51prototype_to_cix.py https://gitlab.com/Smileyt/KomodoEdit | Python | 295 lines
48Requirements: 49 * BeautifulSoup (http://www.crummy.com/software/BeautifulSoup/) 50 * cElementTree (http://effbot.org/downloads/#cElementTree) 65 66from BeautifulSoup import BeautifulSoup, NavigableString 67 260 data = getPrototypeDocsFromWebpage() 261 soup = BeautifulSoup(data) 262 cix_root = createCixRoot(name="Prototype", description="JavaScript framework for web development")legacy.py https://github.com/jlongman/xbmc-hockeystreams-plugin.git | Python | 174 lines
3 4from BeautifulSoup import BeautifulSoup 5import xbmcplugin, xbmcaddon, xbmcguisettings.py https://github.com/knabar/fynbos.git | Python | 64 lines
50# the backend to use when parsing the JavaScript or Stylesheet files 51PARSER = getattr(settings, 'COMPRESS_PARSER', 'compressor.parser.BeautifulSoupParser') 52test_archives.py https://gitlab.com/Acidburn0zzz/hyperkitty | Python | 341 lines
33from mock import Mock 34from bs4 import BeautifulSoup 35from django.contrib.auth.models import User 317 """ 318 soup = BeautifulSoup(html, "html.parser") 319 months_list = soup.find(id="months-list")run.py https://bitbucket.org/skywalking/loginparttimesystem.git | Python | 118 lines
4import cookielib, optparse, setting, urllib, urllib2, sys 5from BeautifulSoup import BeautifulSoup 6from datetime import datetime 28 info = () 29 bs = BeautifulSoup(content).findAll('tr')[project] 30 v = bs.findAll('td') 35def parse_signout(content): 36 bs = BeautifulSoup(content).find('div', {'id': 'body'}) 37 if bs.text == '您沒有簽到記錄,無法進行簽退 ....': 40 info = () 41 v = BeautifulSoup(content).findAll('td') 42 k = BeautifulSoup(content).find('input', {'name': 'signout'})toc.py https://gitlab.com/janninematt/janninematt | Python | 145 lines
12 13from bs4 import BeautifulSoup, Comment 14 119 tree = node = HtmlTreeNode(None, title, 'h0', '') 120 soup = BeautifulSoup(content._content, 'html.parser') 121 settoc = False 137 tree_string = '{}'.format(tree) 138 tree_soup = BeautifulSoup(tree_string, 'html.parser') 139 content.toc = tree_soup.decode(formatter='html')get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 221 lines
15 soup_parser = html5lib.HTMLParser( 16 tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse 17build.py https://gitlab.com/imbest91/grapejuice | Python | 537 lines
16import yaml 17from bs4 import BeautifulSoup 18from jinja2 import Environment, FileSystemLoader, select_autoescape 261 262 md_soup = BeautifulSoup(rendered_markdown, "lxml") 263 summarizer = Summarizer(break_pads=["[summary-snip]"]) 357 358 soup = BeautifulSoup(content, "html5lib") 359default.py https://gitlab.com/billyprice1/husham.com | Python | 280 lines
10import requests 11from BeautifulSoup import BeautifulSoup as bs 12from utils.webutils import *main.py https://gitlab.com/smidaharoun/devoirTunisiePython | Python | 197 lines
3 4from bs4 import BeautifulSoup 5from flask import Flask, jsonify 13page = urllib2.urlopen(main) 14soup = BeautifulSoup(page, 'html.parser') 15soup.prettify() 68 page_level = urllib2.urlopen(url_level) 69 soup_level = BeautifulSoup(page_level, 'html.parser') 70 soup_level.prettify() 100 page_level = urllib2.urlopen(url_level) 101 soup_level = BeautifulSoup(page_level, 'html.parser') 102 soup_level.prettify() 132 page_level = urllib2.urlopen(url_level) 133 soup_level = BeautifulSoup(page_level, 'html.parser') 134 soup_level.prettify()selectors.rst https://github.com/noplay/scrapy.git | ReStructuredText | 380 lines
10 11 * `BeautifulSoup`_ is a very popular screen scraping library among Python 12 programmers which constructs a Python object based on the 36 37.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ 38.. _lxml: http://codespeak.net/lxml/editor.py https://gitlab.com/dannywillems/geeknote | Python | 259 lines
5import tempfile 6from bs4 import BeautifulSoup, NavigableString 7import threading 56 57 # soup.select cant be used with dashes: https://bugs.launchpad.net/beautifulsoup/+bug/1276211 58 for todo in soup.find_all('en-todo'): 78 def ENMLtoText(contentENML): 79 soup = BeautifulSoup(contentENML.decode('utf-8')) 80 115 ''' 116 Transforms github style checklists `* [ ]` in the BeautifulSoup tree to 117 enml. 172 173 soup = BeautifulSoup(contentHTML, 'html.parser') 174 Editor.checklistInSoupToENML(soup)testFunctional.py https://github.com/eaudeweb/Naaya.git | Python | 151 lines
1import re 2from BeautifulSoup import BeautifulSoup 3 141 html = self.browser.get_html() 142 soup = BeautifulSoup(html) 143build.py https://code.google.com/p/python-for-android/ | Python | 204 lines
105print 'Installing BeautifulSoup.' 106beautifulsoup_path = os.path.join(pwd, 'python-libs','BeautifulSoup') 107compileall.compile_dir(beautifulsoup_path) 107compileall.compile_dir(beautifulsoup_path) 108shutil.copy(os.path.join(beautifulsoup_path, 'BeautifulSoup.pyc'), 109 'output/usr/lib/python2.6/BeautifulSoup.pyc')README.rst https://github.com/liberation/django_compressor.git | ReStructuredText | 71 lines
36is done using lxml_ or if it's not available Python's built-in HTMLParser by 37default. As an alternative Django Compressor provides a BeautifulSoup_ and a 38html5lib_ based parser, as well as an abstract base class that makes it easy to 58 59.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ 60.. _lxml: http://lxml.de/PROJECT_IDEAS.md https://gitlab.com/ini-python-lab-course/ss16 | Markdown | 126 lines
49* [import.io](https://import.io/): Service that extracts data from websites 50* [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/): Convenient access to content of a downloaded website 51* [Scrapy](http://scrapy.org/): Framework for scraping websitesutils.py https://github.com/smetsjp/erp5.git | Python | 236 lines
211 def parse_declaration(self, i): 212 """Fix handling of CDATA sections. Code borrowed from BeautifulSoup. 213 """single.py https://gitlab.com/skororu/pysnippets | Python | 67 lines
8 9import bs4 # BeautifulSoup 10import requests # codes.ok, get 29 req = requests.get(url) 30 page = bs4.BeautifulSoup(req.text, 'lxml') 31 45 with requests.get(base_url) as req: 46 page = bs4.BeautifulSoup(req.text, 'lxml') 47 num_previous = page.find('a', rel='prev')['href']independent.py https://gitlab.com/harrigan/TPP | Python | 36 lines
5from crimespider.items import CrimeItem 6from bs4 import BeautifulSoup 7 30 article += c.extract() 31 s = BeautifulSoup(article, 'lxml') 32 print( s.get_text() )flickr.py https://github.com/Br3nda/creepy.git | Python | 204 lines
24import re 25from BeautifulSoup import BeautifulSoup as bs 26 50 ''' 51 Removing some javascript that choked BeautifulSoup's parser 52 '''geoserver.py https://github.com/nicopresto/webSkapes.git | Python | 104 lines
2from urllib2 import urlopen 3from BeautifulSoup import BeautifulStoneSoup 4urlnorm.py https://github.com/e1ven/Lonava.git | Python | 256 lines
28import urllib2 29from BeautifulSoup import BeautifulSoup 30import socket 69 try: 70 soup = BeautifulSoup(html) 71 links = soup.findAll('link')urls.html https://github.com/msparks/pyhole.git | HTML | 155 lines
64 65<span class="kn">from</span> <span class="nn">BeautifulSoup</span> <span class="kn">import</span> <span class="n">BeautifulSoup</span> 66 105 106 <span class="n">soup</span> <span class="o">=</span> <span class="n">BeautifulSoup</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span> 107scrape-stock-index.py https://bitbucket.org/pombredanne/stock-index-scraper.git | Python | 238 lines
22from collections import namedtuple 23from BeautifulSoup import BeautifulSoup 24 115 """ 116 Creates the BeautifulSoup instance from the given html, locates the main 117 table, then processes each row as an individual stock entry. 118 """ 119 soup = BeautifulSoup(html) 120 table = find_main_table(soup) 128 Attempts to find the stock table in the page html, returns None or 129 a BeautifulSoup instance for the table. 130 """ 134 """ 135 Takes a list of table rows (tr) as BeautifulSoup instances where one row 136 contains the data for one stock entry. Uses helper functions to extractversion_check.py https://gitlab.com/mimizone/kolla | Python | 126 lines
19 20from bs4 import BeautifulSoup as bs 21from oslo_config import cfgbugzilla_unittest.py https://github.com/weissms/owb-mirror.git | Python | 296 lines
34 35from modules.BeautifulSoup import BeautifulSoup 36 204 bugzilla = Bugzilla() 205 soup = BeautifulSoup(self._example_attachment) 206 attachment_element = soup.find("attachment")post.py https://github.com/langner/mmqc.git | Python | 342 lines
22import logging 23import BeautifulSoup 24 147 """Retrieve excerpt from article""" 148 s = BeautifulSoup.BeautifulSoup(self.content) 149 # get rid of javascript, noscript and csshelper.py https://github.com/macdylan/LBForum.git | Python | 44 lines
2# -*- coding: UTF-8 -*- 3from BeautifulSoup import BeautifulSoup, NavigableString 4from django.conf import settings 31def clean_html( fragment ): 32 soup = BeautifulSoup( fragment.strip() ) 33 def cleanup( soup ):README.rst https://gitlab.com/gallaecio/chakraversiontracker | ReStructuredText | 245 lines
18 19- `beautifulsoup4 <https://www.crummy.com/software/BeautifulSoup/bs4/doc/>`_ 20readability.py https://gitlab.com/zouxc/cola | Python | 368 lines
31except ImportError: 32 raise DependencyNotInstalledError("BeautifulSoup4") 33ensembl_remote_rest.py https://github.com/kdaily/bcbb.git | Python | 226 lines
17 18from BeautifulSoup import BeautifulSoup 19from Bio import SeqIO 110 organism, gene_id, tx_id) as in_handle: 111 soup = BeautifulSoup(in_handle) 112 stats_possibilities = soup.findAll("dl", "summary") 128 gene_id, tx_id) as in_handle: 129 soup = BeautifulSoup(in_handle) 130 domain_table = soup.find("table", "ss autocenter") 157 gene_id) as in_handle: 158 soup = BeautifulSoup(in_handle) 159 tx_info = soup.find("table", {"id" : "transcripts"}) 181 organism, gene_id) as in_handle: 182 soup = BeautifulSoup(in_handle) 183 orth_table = soup.find("table", "orthologues")faq.rst https://gitlab.com/oytunistrator/scrapy | ReStructuredText | 286 lines
5 6How does Scrapy compare to BeautifulSoup or lxml? 7------------------------------------------------- 8 9`BeautifulSoup`_ and `lxml`_ are libraries for parsing HTML and XML. Scrapy is 10an application framework for writing web spiders that crawl web sites and 13Scrapy provides a built-in mechanism for extracting data (called 14:ref:`selectors <topics-selectors>`) but you can easily use `BeautifulSoup`_ 15(or `lxml`_) instead, if you feel more comfortable working with them. After 18 19In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like 20comparing `jinja2`_ to `Django`_. 21 22.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ 23.. _lxml: http://lxml.de/dork.py https://bitbucket.org/badc0re/xsser_gsoc.git | Python | 273 lines
26urllib2.socket.setdefaulttimeout(5.0) 27from BeautifulSoup import BeautifulSoup 28 230 try: 231 soup = BeautifulSoup(html_data, fromEncoding=encoding) 232 except Exception, e:ba.py https://github.com/barttenbrinke/Bartsidee-Repository.git | Python | 328 lines
13import cPickle as pickle 14from beautifulsoup.BeautifulSoup import BeautifulSoup 15 125 data = FetchUrl(samiurl, 0) 126 soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml") 127 i = 1 164 data = FetchUrl(path) 165 soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml") 166 i = 1ultimate.py https://gitlab.com/eientei95/crunchy-xml-decoder | Python | 351 lines
18import altfuncs 19from bs4 import BeautifulSoup 20from crunchyDec import CrunchyDec 231 media_id = page_url[-6:] 232 xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoPlayer_GetStandardConfig', media_id), 'xml') 233 249 media_id = xmlconfig.find('media_id').string 250 xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoEncode_GetStreamInfo', media_id), 'xml') 251 host = xmlconfig.find('host').stringadapter_twcslibrarynet.py https://code.google.com/p/fanficdownloader/ | Python | 273 lines
25 26from .. import BeautifulSoup as bs 27from ..htmlcleanup import stripHTML 136 137 # use BeautifulSoup HTML parser to make everything easier to find. 138 soup = bs.BeautifulSoup(data) 227 228 # use BeautifulSoup HTML parser to make everything easier to find. 229 seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) 252 253 chapter=bs.BeautifulSoup('<div class="story"></div>') 254 254 255 soup = bs.BeautifulSoup(data) 256models.py https://github.com/agiliq/Dinette.git | Python | 375 lines
10import hashlib 11from BeautifulSoup import BeautifulSoup 12import datetime 260 def htmlfrombbcode(self): 261 soup = BeautifulSoup(self.message.raw) 262 #remove all html tags from the messageyes24_script.py https://github.com/jangxyz/yes24.git | Python | 193 lines
4import urllib, urllib2, cookielib 5from BeautifulSoup import BeautifulSoup 6from datetime import datetime 77 # parse 78 soup = BeautifulSoup(text) 79 order_list_table = soup.table(id="MyOrderListTbl")[0] 125 # parse 126 soup = BeautifulSoup(text) 127 order_price = soup.find(id="CLbTotOrdAmt").b.string 129 text = '<table>' + text[text[1:].find('<')+1:-7] + '</table>' 130 soup = BeautifulSoup(text) 131 point_saved = soup.find(attrs={'class':"price"}).b.stringreadme.md https://github.com/atomia/atomia-nagios-plugins.git | Markdown | 343 lines
30* **WWW::Mechanize** (on ubuntu, just `apt-get install libwww-mechanize-perl`) 31* **BeautifulSoup4** (ubuntu: `apt-get install python-bs4 python3-bs4`) 32zad_7.py https://gitlab.com/mmeisel/LV | Python | 43 lines
8import urllib 9from bs4 import BeautifulSoup 10 26html=urllib.urlopen(urlAddr, "lxml").read() #otvara se url 27soup=BeautifulSoup(html) #i deklarira objekt tipa BeautifulSoup 28scraping-the-web.rst https://github.com/toastdriven/pydanny-event-notes.git | ReStructuredText | 109 lines
44 45 * BeautifulSoup is old and not maintained anymore 46 * html5lib 46 * html5lib 47 - builds BeautifulSoup objects 48 - builds elementTrees_lxml.py https://bitbucket.org/bendikro/deluge-yarss-plugin.git | Python | 296 lines
60 def initialize_soup(self, soup): 61 """Let the BeautifulSoup object know about the standard namespace 62 mapping. 67 def _register_namespaces(self, mapping): 68 """Let the BeautifulSoup object know about namespaces encountered 69 while parsing the document. 74 if key and key not in self.soup._namespaces: 75 # Let the BeautifulSoup object know about a new namespace. 76 # If there are multiple namespaces defined with the same 189 190 # First, Let the BeautifulSoup object know about it. 191 self._register_namespaces(nsmap)__init__.py https://bitbucket.org/yourcelf/old-intertwinkles.git | Python | 163 lines
19from django_browserid import get_audience 20from bs4 import BeautifulSoup 21 93 self.assertTrue("test@example.com" in res.content) 94 soup = BeautifulSoup(res.content) 95 self.assertEquals(soup.find(id="id_email").get("value"), 104 self.assertFalse("This address is unconfirmed" in res.content) 105 soup = BeautifulSoup(res.content) 106 self.assertEquals(soup.find(id="id_email").get("value"),nrlbot.py https://gitlab.com/g.davis13/nrlbot | Python | 344 lines
35 36from bs4 import BeautifulSoup 37from collections import namedtuple 100 r = requests.get(url) 101 soup = BeautifulSoup(r.text, 'html.parser') 102 return soupindex.html https://github.com/larsks/blog.oddbit.com.git | HTML | 240 lines
6 <title>Recent answers on StackOverflow · The Odd Bit</title> 7 <meta name="description" content="Traefik different ports for different Docker containers docker docker-compose traefik git push can not find -o option git Interact with podman docker via socket in Redhat 9 docker redhat podman Capturing commented text in an XML python xml beautifulsoup xml-comments How to execute a shell script as input on an interactive bash pod in Kubernetes? bash shell kubernetes Docker : Opensearch refuses connection with the example in opensearch documentation in docker python-3."> 8 <meta name="HandheldFriendly" content="True"> 142<li><p><a class="sx-answer sx-answer-accepted" href="https://stackoverflow.com/questions/72681436/capturing-commented-text-in-an-xml/72681822#72681822">Capturing commented text in an XML</a> 143 <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/python">python</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml">xml</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/beautifulsoup">beautifulsoup</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml-comments">xml-comments</a> </p></li> 144conversation.py https://gitlab.com/sanchezfauste/TweetDigraph | Python | 108 lines
1from bs4 import BeautifulSoup 2import requests 95 if req.status_code == 200: 96 html = BeautifulSoup(req.text, 'html.parser') 97 conversations = html.find_all('li', {'class':'ThreadedConversation'})base_fetcher.py https://bitbucket.org/filmaster/filmaster-stable/ | Python | 193 lines
3 4from beautifulsoup import BeautifulSoup 5from optparse import make_option 19 20BeautifulSoup.MARKUP_MASSAGE += [ 21 (re.compile(r"<[^>]+>"), lambda tag:quote_re.sub(r"\1 \2", tag.group(0))), 65 def soup(self, data): 66 return BeautifulSoup(data) 67scrape.py https://github.com/sneeu/aliss_scrapers.git | Python | 113 lines
7 8from BeautifulSoup import BeautifulSoup 9 32 html = re.sub('<script.*?>[\s\S]*?</.*?script>', '', html) 33 soup = BeautifulSoup(html) 34podnapisi.py https://github.com/abenea/subliminal.git | Python | 150 lines
87 :return: the response 88 :rtype: :class:`xml.etree.ElementTree.Element` or :class:`bs4.BeautifulSoup` 89 96 else: 97 return bs4.BeautifulSoup(r.content, ['permissive']) 98bills.py https://github.com/runderwood/openstates.git | Python | 138 lines
5import urllib2 6from BeautifulSoup import BeautifulSoup 7 12 13It includes a spurious </HEAD> before the useful data begins and lines like '<option value="Bill"selected="selected">Bill</option>', in which the lack of a space between the attributes confuses BeautifulSoup. 14''' 69 return False 70 soup = BeautifulSoup(cleansource(data)) 71 rows = soup.findAll('table')[1].findAll('tr')[1:] 91 with self.urlopen(histurl) as data: 92 soup = BeautifulSoup(cleansource(data)) 93 basicinfo = soup.findAll('div', id='bhistleft')[0]__init__.py https://github.com/junalmeida/Sick-Beard.git | Python | 96 lines
40 treeType - the name of the tree type required (case-insensitive). Supported 41 values are "simpletree", "dom", "etree" and "beautifulsoup" 42 51 ElementTree, cElementTree and lxml.etree). 52 "beautifulsoup" - Beautiful soup (if installed) 53 70 treeBuilderCache[treeType] = simpletree.TreeBuilder 71 elif treeType == "beautifulsoup": 72 import soupmetacritic.py https://bitbucket.org/alex_fish/vgr.git | Python | 273 lines
5from datetime import datetime 6from BeautifulSoup import BeautifulSoup 7from pprint import pprint 70 return None 71 soup = BeautifulSoup(html) 72 i = 0 124 return None 125 soup = BeautifulSoup(html) 126 prod = MetacriticInfo()hyperleech.py https://bitbucket.org/devinjames/hyperleech.git | Python | 407 lines
24 import platform 25 from bs4 import BeautifulSoup 26 import requests 279 loghandle = open(self.logfile, "r") 280 self.soup = BeautifulSoup(loghandle.read()) # , 'xml') # this contains the original soup, never changes. 281 self.username = self.soup.find('user') 286 print "No log file, creating new" 287 self.xml = BeautifulSoup('<hyperleech>') # , 'xml') 288 self.soup = self.xml 307 def write(self): 308 if type(self.loghandle) is not file or type(self.soup) is not BeautifulSoup: 309 print "Something with the log handle isn't initialized"lequipe_fr.py https://gitlab.com/edelans/scoragora | Python | 180 lines
55 html = requests.get(url).text 56 soup = BeautifulSoup.BeautifulSoup(html) 57 # Get date 85 html = requests.get(day_url).text 86 soup = BeautifulSoup.BeautifulSoup(html) 87 result = [] 119 html = requests.get(url).text 120 soup = BeautifulSoup.BeautifulSoup(html) 121 root = soup.find(attrs={'name': 'IDNIVEAU'}) 147 html = requests.get(url).text 148 soup = BeautifulSoup.BeautifulSoup(html) 149 soup.find(id="timeline") 169 html = requests.get(url).text 170 soup = BeautifulSoup.BeautifulSoup(html) 171 hometeam_score = soup.find(id='scoDom')tpb.py https://github.com/SpLord/CouchPotato.git | Python | 160 lines
4from dateutil.parser import parse 5from imdb.parser.http.bsouplxml._bsoup import SoupStrainer, BeautifulSoup 6from urllib import quote_plus 63 tables = SoupStrainer('table') 64 html = BeautifulSoup(data, parseOnlyThese = tables) 65 resultTable = html.find('table', attrs = {'id':'searchResult'}) 154 div = SoupStrainer('div') 155 html = BeautifulSoup(data, parseOnlyThese = div) 156 html = html.find('div', attrs = {'class':'nfo'})searchengine.py https://github.com/kzfm1024/misc.git | Python | 306 lines
1import urllib2 2from BeautifulSoup import * 3from urlparse import urljoin 106 try: 107 soup=BeautifulSoup(c.read()) 108 self.addtoindex(page,soup)vt_hash2filenames.py https://bitbucket.org/Vnoxygen/malformity.git | Python | 43 lines
3import re 4from BeautifulSoup import BeautifulSoup 5from canari.maltego.utils import debug, progressshorter.py https://bitbucket.org/badc0re/xsser_gsoc.git | Python | 76 lines
27from cStringIO import StringIO 28from BeautifulSoup import BeautifulSoup 29 65 66 soup = BeautifulSoup(out.getvalue()) 67 if self._service == 'tinyurl':KitMensaService.py https://gitlab.com/namboy94/messengerbot | Python | 230 lines
26import requests 27from bs4 import BeautifulSoup 28from typing import Tuple 162 html = requests.get(url).text 163 soup = BeautifulSoup(html, "html.parser") 164 resource = soup.select('body')2010-11-21-exploring_art_data_3.md https://gitlab.com/rheaplex/robmyers.org | Markdown | 172 lines
23 <tt>#!/usr/bin/python 24 from BeautifulSoup import BeautifulStoneSoup 25 import recatalogparser.py https://github.com/jeffh/YACS.git | Python | 106 lines
2import re 3from BeautifulSoup import BeautifulSoup 4from rpi_courses.config import DEPARTMENTS 40 course_page = re.sub('<br */?>', '\n', course_page) 41 soup = BeautifulSoup(course_page, convertEntities=BeautifulSoup.HTML_ENTITIES) 42 title_text = soup.findAll('h1 h2 h3 h4 h5 h6'.split(' '))[0].texttest_markdown_to_html.py https://gitlab.com/Ivy001/pants | Python | 186 lines
173 174 soup = bs4.BeautifulSoup(markup=html) 175 self.assertIsNotNone(soup.find(text='A good link:'))utils.py https://github.com/Gautier/django-page-cms.git | Python | 139 lines
119 return content 120 from BeautifulSoup import BeautifulSoup 121 tree = BeautifulSoup(content)base.py https://github.com/2dpodcast/jaikuenginepatch.git | Python | 209 lines
17 18from beautifulsoup import BeautifulSoup 19 193 self.assertWellformed(response) 194 parsed = BeautifulSoup.BeautifulSoup(response.content) 195 found = parsed.findAll('a', attrs = { 'class': link_class})whit.py https://bitbucket.org/chef1991/whit.git | Python | 361 lines
1from bs4 import BeautifulSoup # HTML handling 2from flask import Flask, request, redirect # Routing 80 # Ensure the overview is well formed 81 overview = BeautifulSoup(entryText) 82 264 p3 = p2['parse']['text']['*'] 265 p4 = BeautifulSoup(p3) 266 p5 = p4.find_all('p')release.py https://gitlab.com/LocutusOfPenguin/python-chess | Python | 178 lines
141 sys.exit(1) 142 soup = bs4.BeautifulSoup(res.text, "html.parser") 143 csrf = soup.find("input", {"name": "CSRFToken"})["value"]util.py https://gitlab.com/Lett1/SlackDuckBot | Python | 151 lines
5import re 6from bs4 import BeautifulSoup 7from urllib.request import Request, urlopen 113 else: 114 soup = BeautifulSoup(html, "lxml") 115 if soup.title is not None:importer.py https://github.com/gregmalcolm/Bookie.git | Python | 201 lines
2from datetime import datetime 3from BeautifulSoup import BeautifulSoup 4from bookie.models import BmarkMgr 78 79 soup = BeautifulSoup(file_io) 80 can_handle = False 90 """Given a file, process it""" 91 soup = BeautifulSoup(self.file_handle) 92 140 """ 141 soup = BeautifulSoup(file_io) 142 can_handle = False 157 """ 158 soup = BeautifulSoup(self.file_handle) 159 if not soup.contents[0] == "DOCTYPE NETSCAPE-Bookmark-file-1":types.py https://github.com/rxuriguera/bibtexIndexMaker.git | Python | 87 lines
37 self.msg attribute contains explanation why parsing failed 38 self.tag attribute contains BeautifulSoup object with the most relevant tag 39 that failed to parsesetup.py https://github.com/eged/django-blog-zinnia.git | Python | 37 lines
33 'akismet', 34 'BeautifulSoup', 35 ])inject.py https://gitlab.com/BoTranVan/MITMf | Python | 195 lines
23 24from bs4 import BeautifulSoup 25from plugins.plugin import Plugin 86 if encoding is not None: 87 html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml") 88 else: 88 else: 89 html = BeautifulSoup(data, "lxml") 90 98 if self.html_payload: 99 payload = BeautifulSoup(self.html_payload, "html.parser") 100 html.body.append(payload) 104 with open(self.html_file, 'r') as file: 105 payload = BeautifulSoup(file.read(), "html.parser") 106 html.body.append(payload)tvsubtitles.py https://github.com/junalmeida/Sick-Beard.git | Python | 191 lines
98 :return: the response 99 :rtype: :class:`bs4.BeautifulSoup` 100 104 raise ProviderError('Request failed with status code %d' % r.status_code) 105 return bs4.BeautifulSoup(r.content, ['permissive']) 106test_microformats.py https://bitbucket.org/inirudebwoy/gdziebylkaziu.git | Python | 155 lines
5try: 6 from BeautifulSoup import BeautifulSoup 7except ImportError: 7except ImportError: 8 BeautifulSoup = None 9 25 def test_one_soup(self): 26 if BeautifulSoup: 27 locations = self.parser.find_all(BeautifulSoup(self.MARKUP)) 31 def test_multi_soup(self): 32 if BeautifulSoup: 33 locations = self.parser.find_all(BeautifulSoup(self.MARKUP * 3)) 50 def test_none_soup(self): 51 if BeautifulSoup: 52 locations = self.parser.find_all(BeautifulSoup(self.MARKUP))sponsoredlinks.py https://bitbucket.org/manaphassan/raspberry-pwn.git | Python | 235 lines
14from htmlentitydefs import name2codepoint 15from BeautifulSoup import BeautifulSoup 16 30 self.msg attribute contains explanation why parsing failed 31 self.tag attribute contains BeautifulSoup object with the most relevant tag that failed to parse 32 Thrown only in debug mode 158 159 return BeautifulSoup(page) 160share_post.py https://gitlab.com/janninematt/janninematt | Python | 81 lines
8 9from bs4 import BeautifulSoup 10try: 18def article_title(content): 19 main_title = BeautifulSoup(content.title, 'html.parser').get_text().strip() 20 sub_title = '' 21 if hasattr(content, 'subtitle'): 22 sub_title = ' ' + BeautifulSoup(content.subtitle, 'html.parser').get_text().strip() 23 return quote(('%s%s' % (main_title, sub_title)).encode('utf-8')) 31def article_summary(content): 32 return quote(BeautifulSoup(content.summary, 'html.parser').get_text().strip().encode('utf-8')) 33models.py https://github.com/mci/mpatlas.git | Python | 250 lines
13from ckeditor.fields import RichTextField 14from bs4 import BeautifulSoup 15from uuslug import uuslug, slugifycablemodem_check.py https://gitlab.com/mikeos2/Nagios_Plugins | Python | 223 lines
38try: 39 from bs4 import BeautifulSoup 40except ImportError: 40except ImportError: 41 print "Error: (" + str(Nagios_UNKNOWN) + ") install BeautifulSoup!" 42 sys.exit(Nagios_UNKNOWN) 69 70 return BeautifulSoup(page) 71get_legislation.py https://github.com/gosuri/fiftystates.git | Python | 117 lines
14 state = 'nc' 15 soup_parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse 16make_chart.py https://github.com/egor83/hn-stuff.git | Python | 181 lines
1import BeautifulSoup 2import logging 111 112 soup = BeautifulSoup.BeautifulSoup(page) 113__init__.py https://github.com/junalmeida/Sick-Beard.git | Python | 307 lines
75 76# The BeautifulSoup class will take feature lists from developers and use them 77# to look up builders in this registry.Weather.py https://gitlab.com/leiftomas/jasper-client | Python | 172 lines
43 'international_cities.asp') 44 soup = bs4.BeautifulSoup(r.text) 45 data = soup.find(id="inner-content").find('pre').stringsubtitle-downloader.py https://gitlab.com/132nd-etcher/subtitle-downloader | Python | 124 lines
18import requests,time,re,zipfile 19from bs4 import BeautifulSoup 20PY_VERSION = sys.version_info[0] 74 r=requests.get("http://subscene.com/subtitles/release?q="+root); 75 soup=BeautifulSoup(r.content,"lxml") 76 atags=soup.find_all("a") 83 r=requests.get("http://subscene.com"+href); 84 soup=BeautifulSoup(r.content,"lxml") 85 lin=soup.find_all('a',attrs={'id':'downloadButton'})[0].get("href") 86 r=requests.get("http://subscene.com"+lin); 87 soup=BeautifulSoup(r.content,"lxml") 88 subfile=open(root2+".zip", 'wb')baseparser.py https://gitlab.com/andyblaesus/newsdiffs | Python | 156 lines
52# Ick. 53from BeautifulSoup import BeautifulSoup 54def bs_fixed_getText(self, separator=u""): 54def bs_fixed_getText(self, separator=u""): 55 bsmod = sys.modules[BeautifulSoup.__module__] 56 if not len(self.contents): 65 return separator.join(strings) 66sys.modules[BeautifulSoup.__module__].Tag.getText = bs_fixed_getText 67# End fix 111 112 feeder_bs = BeautifulSoup #use this version of beautifulsoup for feed 113fileops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 136 lines
6from pprint import pprint 7from bs4 import BeautifulSoup 8from sh import git 87 with open(filename, 'w') as f: 88 f.write(BeautifulSoup(html, 'html5lib').prettify() + '\n') 89 gitCommitModel(contest['model'], filename, 'contest created: ' + model['slug']) 105 with open(filename, 'w') as f: 106 f.write(BeautifulSoup(html, "html5lib").prettify() + "\n") 107 gitCommitModel(challenge, filename, 'challenge created: ' + challenge['slug'])plugin.py https://github.com/lbjay/supybot-plugins.git | Python | 47 lines
9 10from BeautifulSoup import BeautifulSoup 11 23 24 soup = BeautifulSoup(doc) 25 dd = soup.find('dd', 'highlight')wikipediaidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 211 lines
4import re 5from exe.engine.beautifulsoup import BeautifulSoup 6from exe.engine.idevice import Idevice 73 page = page.replace(u' ', u' ') 74 soup = BeautifulSoup(page, False) 75 content = soup.first('div', {'id': "content"})sitegen.py https://gitlab.com/Ivy001/pants | Python | 374 lines
36 import bs4 37 return bs4.BeautifulSoup(*args, **kwargs) 38 70def load_soups(config): 71 """Generate BeautifulSoup AST for each page listed in config.""" 72 soups = {}ONPEcrawler.py https://github.com/PuercoPop/EleccionesPeru.git | Python | 169 lines
4from urllib2 import Request, urlopen 5from BeautifulSoup import BeautifulSoup 6import Elecciones.models as m 54 f = urlopen( req ) 55 soup = BeautifulSoup( f.read(), 56 convertEntities=BeautifulSoup.HTML_ENTITIES) 68 f = urlopen( req ) 69 soup = BeautifulSoup( f.read(), 70 convertEntities=BeautifulSoup.HTML_ENTITIES) 79 f=urlopen(req) 80 soup = BeautifulSoup( f.read(), 81 convertEntities=BeautifulSoup.HTML_ENTITIES) 105 f = urlopen( req ) 106 soup = soup.BeautifulSoup( f.read(), 107 convertEntities=BeautifulSoup.HTML_ENTITIES)download_russian_contrast.py https://bitbucket.org/Meister17/term-extraction.git | Python | 108 lines
2# -*- coding: utf-8 -*- 3from BeautifulSoup import BeautifulSoup 4import optparse 38 html = response.read() 39 soup = BeautifulSoup(html) 40 zero_result = False;__init__.py https://github.com/oesmith/django-css.git | Python | 316 lines
3import subprocess 4from BeautifulSoup import BeautifulSoup 5from tempfile import NamedTemporaryFile 54 self.split_content = [] 55 self.soup = BeautifulSoup(self.content) 56 self.xhtml = xhtml 268 basename = os.path.splitext(os.path.basename(filename))[0] 269 elem = BeautifulSoup(re.sub(basename+ext,basename+'.css',unicode(elem))) 270 filename = path + '.css'test_pipreqs.py https://gitlab.com/Kravcenko/pipreqs | Python | 188 lines
23 'peewee', 'ujson', 'nonexistendmodule', 'bs4', 'after_method_is_valid_even_if_not_pep8' ] 24 self.modules2 = ['beautifulsoup4'] 25 self.local = ["docopt", "requests", "nose", 'pyflakes']phew.py https://gitlab.com/fnaticshank/crawler | Python | 327 lines
13import requests 14from bs4 import BeautifulSoup 15 213 errors="replace") 214 soup = BeautifulSoup(content, "lxml") 215 tags = soup('a')layouttestresults.py https://gitlab.com/x33n/phantomjs | Python | 91 lines
31from webkitpy.common.net.resultsjsonparser import ResultsJSONParser 32from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup, SoupStrainer 33from webkitpy.layout_tests.models import test_resultsclozeidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 240 lines
150 """ 151 takes a BeautifulSoup fragment (i) and bursts its contents to 152 import this idevice from a CommonCartridge export