100+ results results for 'beautifulsoup' (161 ms)
11from PIL import ImageDraw 12from bs4 import BeautifulSoup 13 93 raw_html = self.browser.response().read() 94 soup = BeautifulSoup(raw_html) 95 149 mechanize object.''' 150 soup = BeautifulSoup(self.response) 151 formcount=0 165 mechanize object.''' 166 soup = BeautifulSoup(self.response) 167 formcount=0 180 ''' Displays all the links from the current browser page.''' 181 soup = BeautifulSoup(raw_html) 182 categories_table_obj = soup.find("table", attrs={"id":'my_table'})google_soup.py https://github.com/chudler/Community-Zenpacks.git | Python | 221 lines
12import logging.handlers 13from BeautifulSoup import BeautifulSoup 14from ClientForm import * 83 def findApplications(self): 84 main_apps = BeautifulSoup(self.resetClient()) 85 column_headers = [] 87 for table in main_apps.findAll('table', limit=1): 88 # table is a BeautifulSoup.Tag object 89 column_headers = self.extract_headings(table) 148 quota_details = re.sub(quota_fix, '', quota_details) 149 quota_soup = BeautifulSoup(quota_details) 150 quota_section = quota_soup.find(attrs={'id':'ae-quota-details'}) 168 app_main = open('/tmp/dashboard.html', 'r').read() 169 app_soup = BeautifulSoup(app_main) 170 load_section = app_soup.find(text=re.compile('Current Load'))testFunctional.py https://github.com/bogtan/Naaya.git | Python | 212 lines
21from unittest import TestSuite, makeSuite 22from BeautifulSoup import BeautifulSoup 23 147 html = self.browser.get_html() 148 soup = BeautifulSoup(html) 149_htmlparser.py https://github.com/yoheia/yoheia.git | Python | 265 lines
53 54class BeautifulSoupHTMLParser(HTMLParser): 55 def handle_starttag(self, name, attrs): 163 args, kwargs = self.parser_args 164 parser = BeautifulSoupHTMLParser(*args, **kwargs) 165 parser.soup = self.soup 169 warnings.warn(RuntimeWarning( 170 "Python's built-in HTMLParser cannot parse the given document. This is not a bug in Beautiful Soup. The best solution is to install an external parser (lxml or html5lib), and use Beautiful Soup with that parser. See http://www.crummy.com/software/BeautifulSoup/bs4/doc/#installing-a-parser for help.")) 171 raise e 198""", re.VERBOSE) 199 BeautifulSoupHTMLParser.locatestarttagend = locatestarttagend 200 261 262 BeautifulSoupHTMLParser.parse_starttag = parse_starttag 263 BeautifulSoupHTMLParser.set_cdata_mode = set_cdata_modeTV3Scrapper.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 254 lines
9import sys 10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup 11import urllib, urllib2TVSeriesUtil.py http://xbmc-vodie.googlecode.com/svn/trunk/ | Python | 224 lines
9import sys 10from BeautifulSoup import SoupStrainer, MinimalSoup as BeautifulSoup, BeautifulStoneSoup 11import urllib, urllib2, cookielibmain.py https://github.com/gtracy/APODEmail.git | Python | 249 lines
18 19from BeautifulSoup import BeautifulSoup, Tag 20 184 185 soup = BeautifulSoup(result.content) 186 #logging.debug(soup)hackerrankops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 200 lines
9import logging # TODO get rid of these print statements! 10from bs4 import BeautifulSoup 11 173 return 174 csrfHtml = BeautifulSoup(r.text, 'html.parser').find(id = 'csrf-token') 175 if csrfHtml:diagnose.py https://gitlab.com/Rheinhart/csuchen-Guard | Python | 216 lines
8import bs4 9from bs4 import BeautifulSoup, __version__ 10from bs4.builder import builder_registry 69 try: 70 soup = BeautifulSoup(data, parser) 71 success = True 178 a = time.time() 179 soup = BeautifulSoup(data, parser) 180 b = time.time() 207 vars = dict(bs4=bs4, data=data, parser=parser) 208 cProfile.runctx('bs4.BeautifulSoup(data, parser)' , vars, vars, filename) 209quiztestidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 306 lines
217 """ 218 takes a BeautifulSoup fragment (i) and bursts its contents to 219 import this idevice from a CommonCartridge exportdefault.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 406 lines
32 33from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup 34import socket 108 109 beautifulSoup = BeautifulSoup(http) 110 userPanel = beautifulSoup.find('a', {"id": "loginlink"}) 171 172 beautifulSoup = BeautifulSoup(http) 173 content = beautifulSoup.find('div', attrs={'id': 'dle-content'}) 243 244 beautifulSoup = BeautifulSoup(http) 245 categoryContainer = beautifulSoup.find('ul', 'cats') 273 274 beautifulSoup = BeautifulSoup(http) 275 tagsContainer = beautifulSoup.find('td', 'news')jobs.py git://pkgs.fedoraproject.org/sugar-read | Python | 310 lines
26import os.path 27import BeautifulSoup 28 71 def _searchfile(self, fileobj): 72 soup = BeautifulSoup.BeautifulSoup(fileobj) 73 body = soup.find('body')test_converter_unittest.py https://gitlab.com/x33n/phantomjs | Python | 319 lines
34from webkitpy.common.system.outputcapture import OutputCapture 35from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup 36from webkitpy.w3c.test_converter import W3CTestConverter 185 186 doc = BeautifulSoup(test_html) 187 oc = OutputCapture() 266 try: 267 converted = converter.convert_prefixed_properties(BeautifulSoup(test_content[1]), DUMMY_FILENAME) 268 finally: 281 if isinstance(converted, basestring): 282 converted = BeautifulSoup(converted) 283mastodon.scm https://gitlab.com/daym/guix | Scheme | 174 lines
60 (inputs 61 `(("python-beautifulsoup4" ,python-beautifulsoup4) 62 ("python-requests" ,python-requests)browser.py https://bitbucket.org/synl0rd/upt_tik_itenas.git | Python | 236 lines
87 import BeautifulSoup 88 return BeautifulSoup.BeautifulSoup(self.data) 89util.py https://github.com/sunlightlabs/muni_words.git | Python | 198 lines
8from django.contrib.gis.geos import Point 9from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup 10from excludes import EXCLUDED 40def strip_html(string): 41 return ''.join([e for e in BeautifulSoup(string).recursiveChildGenerator() if isinstance(e, unicode)]).replace(' ', ' ') 42lec_04_scraping.ipynb https://gitlab.com/xbsd/content | Jupyter | 379 lines
23 "from pattern import web\n", 24 "from BeautifulSoup import BeautifulSoup" 25 ], 261 "source": [ 262 "# Using BeautifulSoup" 263 ] 268 "input": [ 269 "bs = BeautifulSoup(r.text)\n", 270 "for movie in bs.findAll('td', 'title'):\n",get_legislation.py https://github.com/chrismetcalf/fiftystates.git | Python | 112 lines
4import datetime as dt 5from BeautifulSoup import BeautifulSoup 6 18 19 # Get the details page and parse it with BeautifulSoup. These 20 # pages contain a malformed 'p' tag that (certain versions of) 23 details_raw = details_raw.replace('<P ALIGN=CENTER">', '') 24 details = BeautifulSoup(details_raw) 25 26 # Get the history page (following a link from the details page). 27 # Once again, we remove tags that BeautifulSoup chokes on 28 # (including all meta tags, because bills with quotation marks 35 history_raw = rem_meta.sub('</title></head>', history_raw) 36 history = BeautifulSoup(history_raw) 37examples.py https://github.com/towerjoo/django-test-extensions.git | Python | 112 lines
103 def test_using_beautiful_soup(self): 104 "Example test for content on a given view, this time using the BeautifulSoup parser" 105 response = self.client.get('/example/') 105 response = self.client.get('/example/') 106 soup = BeautifulSoup(response.content) 107 self.assert_equal("Page Title", soup.find("title").string.strip())lxml.html.ElementSoup-module.html https://github.com/jcrobak/hue.git | HTML | 278 lines
64<h1 class="epydoc">Module ElementSoup</h1><p class="nomargin-top"><span class="codelink"><a href="lxml.html.ElementSoup-pysrc.html">source code</a></span></p> 65Legacy interface to the BeautifulSoup HTML parser. 66 91 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span><br /> 92 Convert a BeautifulSoup tree to a list of Element trees.</td> 93 <td align="right" valign="top"> 108 <td><span class="summary-sig"><a name="parse"></a><span class="summary-sig-name">parse</span>(<span class="summary-sig-arg">file</span>, 109 <span class="summary-sig-arg">beautifulsoup</span>=<span class="summary-sig-default">None</span>, 110 <span class="summary-sig-arg">makeelement</span>=<span class="summary-sig-default">None</span>)</span></td> 141 </td><td class="summary"> 142 <a href="lxml.html.ElementSoup-module.html#__doc__" class="summary-name" onclick="show_private();">__doc__</a> = <code title=""""Legacy interface to the BeautifulSoup HTML parser. 143"""">"""Legacy interface to the BeautifulSoup HTML pars<code class="variable-ellipsis">...</code></code> 179 180 <p>Convert a BeautifulSoup tree to a list of Element trees.</p> 181<p>Returns a list instead of a single root Element to supportbills.py https://github.com/runderwood/openstates.git | Python | 293 lines
3 4from BeautifulSoup import BeautifulSoup 5 33 with self.urlopen(index_file) as doc: 34 soup = BeautifulSoup(cleanup_html(doc)) 35 123 with self.urlopen(url) as doc: 124 soup = BeautifulSoup(doc) 125 date=NoneMakefile https://gitlab.com/lokiexinferis/vim-configs | Makefile | 82 lines
78 virtualenv build/html2vimdoc 79 build/html2vimdoc/bin/pip install beautifulsoup coloredlogs==4.0 markdown 80tracker.py https://gitlab.com/jan.raddatz/myimmitracker-analyzer | Python | 163 lines
3#pip install --proxy proxy:8080 bs4 4from bs4 import BeautifulSoup 5# pip install --proxy proxy:8080 ansicolors 80# r = requests.get(hostname + url_to_scrape, proxies=proxyDict) 81 soup = BeautifulSoup(r.text, 'html.parser') 82 all_tables = soup.find_all('table')module-tree.html https://github.com/jcrobak/hue.git | HTML | 170 lines
73 <ul> 74 <li> <strong class="uidlink"><a href="lxml.html.ElementSoup-module.html">lxml.html.ElementSoup</a></strong>: <em class="summary">Legacy interface to the BeautifulSoup HTML parser.</em> </li> 75 <li class="private"> <strong class="uidlink">lxml.html._dictmixin</strong> </li> 82 <li> <strong class="uidlink"><a href="lxml.html.html5parser-module.html">lxml.html.html5parser</a></strong>: <em class="summary">An interface to html5lib.</em> </li> 83 <li> <strong class="uidlink"><a href="lxml.html.soupparser-module.html">lxml.html.soupparser</a></strong>: <em class="summary">External interface to the BeautifulSoup HTML parser.</em> </li> 84 <li> <strong class="uidlink"><a href="lxml.html.usedoctest-module.html">lxml.html.usedoctest</a></strong>: <em class="summary">Doctest module for HTML comparison.</em> </li>Makefile https://github.com/freebsd/freebsd-ports.git | Makefile | 90 lines
49EXCEL_DESC= MS Excel I/O Add-ons 50HTML5LIB_DESC= Parse HTML with www/py-html5lib and www/py-beautifulsoup 51HTML_DESC= HTML Parsing/Generation Add-ons 52JINJA2_DESC= Support conditional HTML formatting with devel/py-Jinja2 53LXML_DESC= Parse HTML with devel/py-lxml and www/py-beautifulsoup 54MPL_DESC= Support graphical plotting output via math/py-matplotlib 72BTLNCK_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}bottleneck>=1.2.0:math/py-bottleneck@${PY_FLAVOR} 73HTML5LIB_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \ 74 ${PYTHON_PKGNAMEPREFIX}html5lib>0:www/py-html5lib@${PY_FLAVOR} 75JINJA2_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}Jinja2>0:devel/py-Jinja2@${PY_FLAVOR} 76LXML_RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \ 77 ${PYTHON_PKGNAMEPREFIX}lxml>0:devel/py-lxml@${PY_FLAVOR}Parser.py https://gitlab.com/Fremis/IRCourse | Python | 276 lines
7 8from bs4 import BeautifulSoup, NavigableString 9import gc 74 try: 75 soup = BeautifulSoup(open_file, 'lxml') 76 except UnicodeDecodeError:testFunctional.py https://github.com/bogtan/Naaya.git | Python | 221 lines
22from copy import deepcopy 23from BeautifulSoup import BeautifulSoup 24 153 html = self.browser.get_html() 154 soup = BeautifulSoup(html) 155primewire.py https://bitbucket.org/Leia18/gmc.git | Python | 207 lines
5 6from BeautifulSoup import BeautifulSoup 7from universalscrapers import proxy 28 try: 29 html = BeautifulSoup(self.get_html(title, self.moviesearch_link)) 30 index_items = html.findAll('div', attrs={'class': 'index_item index_item_ie'}) 68 try: 69 html = BeautifulSoup(self.get_html(title, self.tvsearch_link)) 70 index_items = html.findAll('div', attrs={'class': re.compile('index_item.+?')}) 89 show_url = urlparse.urljoin(self.base_link, href) 90 html = BeautifulSoup(proxy.get(show_url, 'tv_episode_item')) 91 123 html = proxy.get(url, 'searchform') 124 parsed_html = BeautifulSoup(html) 125 key = parsed_html.findAll('input', attrs={'name': 'key'})[0]["value"]citotron.py https://gitlab.com/maxigas/citotron.git | Python | 265 lines
8from args import args 9from bs4 import BeautifulSoup as bs 10from collections import Counter as counterHelpIndex.py https://github.com/esitarski/CrossMgr.git | Python | 98 lines
9import re 10from bs4 import BeautifulSoup 11 45 for f in glob.iglob( os.path.join(htmlDocDir, '*.html') ): 46 doc = BeautifulSoup( open(f).read(), 'html.parser' ) 47 div = doc.find('div', class_='content')__init__.py https://github.com/theduke/sehistory.git | Python | 238 lines
2 3from libraries.BeautifulSoup import BeautifulSoup 4 152 def extractLogo(self, html): 153 soup = BeautifulSoup(html) 154_html5lib.py https://gitlab.com/eientei95/crunchy-xml-decoder | Python | 221 lines
82 def fragmentClass(self): 83 self.soup = BeautifulSoup("") 84 self.soup.name = "[document_fragment]"filter.py https://gitlab.com/cobhuni/hadith_alislam_extractor | Python | 197 lines
31from argparse import ArgumentParser 32from bs4 import BeautifulSoup 33import multiprocessing as mp 68 Args: 69 t (class 'bs4.BeautifulSoup'): html to parse 70 fn (str): filename, to trace errors. 72 Returns: 73 class 'bs4.BeautifulSoup': cell containing the text 74 117 with open(os.path.join(args.input_dir, fname)) as inf: 118 soup = BeautifulSoup(inf.read(),'lxml') 119requirements_txt_linker_spec.rb https://gitlab.com/YarNayar/gitlab-ce | Ruby | 95 lines
28 nose-cov 29 beautifulsoup4 30 # 70 expect(subject).to include(link('nose-cov', 'https://pypi.python.org/pypi/nose-cov')) 71 expect(subject).to include(link('beautifulsoup4', 'https://pypi.python.org/pypi/beautifulsoup4')) 72 expect(subject).to include(link('docopt', 'https://pypi.python.org/pypi/docopt'))imo.py https://gitlab.com/rithvikvibhu/batch-sof | Python | 71 lines
3import pprint 4from bs4 import BeautifulSoup 5 47 48 soup = BeautifulSoup(r.text, "html5lib") # Soup up html 49 table_data = [[cell.text for cell in row("td")] 49 table_data = [[cell.text for cell in row("td")] 50 for row in BeautifulSoup(r.text, "html5lib")("tr")] 51prototype_to_cix.py https://gitlab.com/Smileyt/KomodoEdit | Python | 295 lines
48Requirements: 49 * BeautifulSoup (http://www.crummy.com/software/BeautifulSoup/) 50 * cElementTree (http://effbot.org/downloads/#cElementTree) 65 66from BeautifulSoup import BeautifulSoup, NavigableString 67 260 data = getPrototypeDocsFromWebpage() 261 soup = BeautifulSoup(data) 262 cix_root = createCixRoot(name="Prototype", description="JavaScript framework for web development")settings.py https://github.com/knabar/fynbos.git | Python | 64 lines
50# the backend to use when parsing the JavaScript or Stylesheet files 51PARSER = getattr(settings, 'COMPRESS_PARSER', 'compressor.parser.BeautifulSoupParser') 52test_archives.py https://gitlab.com/Acidburn0zzz/hyperkitty | Python | 341 lines
33from mock import Mock 34from bs4 import BeautifulSoup 35from django.contrib.auth.models import User 317 """ 318 soup = BeautifulSoup(html, "html.parser") 319 months_list = soup.find(id="months-list")toc.py https://gitlab.com/janninematt/janninematt | Python | 145 lines
12 13from bs4 import BeautifulSoup, Comment 14 119 tree = node = HtmlTreeNode(None, title, 'h0', '') 120 soup = BeautifulSoup(content._content, 'html.parser') 121 settoc = False 137 tree_string = '{}'.format(tree) 138 tree_soup = BeautifulSoup(tree_string, 'html.parser') 139 content.toc = tree_soup.decode(formatter='html')get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 221 lines
15 soup_parser = html5lib.HTMLParser( 16 tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse 17__init__.py https://bitbucket.org/rattray/popcorn-portal.git | Python | 355 lines
3"The Screen-Scraper's Friend" 4http://www.crummy.com/software/BeautifulSoup/ 5 15documentation: 16http://www.crummy.com/software/BeautifulSoup/bs4/doc/ 17""" 23 24__all__ = ['BeautifulSoup'] 25 48 49class BeautifulSoup(Tag): 50 """ 91 "BS4 does not respect the convertEntities argument to the " 92 "BeautifulSoup constructor. Entities are always converted " 93 "to Unicode characters.")build.py https://gitlab.com/imbest91/grapejuice | Python | 537 lines
16import yaml 17from bs4 import BeautifulSoup 18from jinja2 import Environment, FileSystemLoader, select_autoescape 261 262 md_soup = BeautifulSoup(rendered_markdown, "lxml") 263 summarizer = Summarizer(break_pads=["[summary-snip]"]) 357 358 soup = BeautifulSoup(content, "html5lib") 359default.py https://gitlab.com/billyprice1/husham.com | Python | 280 lines
10import requests 11from BeautifulSoup import BeautifulSoup as bs 12from utils.webutils import *main.py https://gitlab.com/smidaharoun/devoirTunisiePython | Python | 197 lines
3 4from bs4 import BeautifulSoup 5from flask import Flask, jsonify 13page = urllib2.urlopen(main) 14soup = BeautifulSoup(page, 'html.parser') 15soup.prettify() 68 page_level = urllib2.urlopen(url_level) 69 soup_level = BeautifulSoup(page_level, 'html.parser') 70 soup_level.prettify() 100 page_level = urllib2.urlopen(url_level) 101 soup_level = BeautifulSoup(page_level, 'html.parser') 102 soup_level.prettify() 132 page_level = urllib2.urlopen(url_level) 133 soup_level = BeautifulSoup(page_level, 'html.parser') 134 soup_level.prettify()editor.py https://gitlab.com/dannywillems/geeknote | Python | 259 lines
5import tempfile 6from bs4 import BeautifulSoup, NavigableString 7import threading 56 57 # soup.select cant be used with dashes: https://bugs.launchpad.net/beautifulsoup/+bug/1276211 58 for todo in soup.find_all('en-todo'): 78 def ENMLtoText(contentENML): 79 soup = BeautifulSoup(contentENML.decode('utf-8')) 80 115 ''' 116 Transforms github style checklists `* [ ]` in the BeautifulSoup tree to 117 enml. 172 173 soup = BeautifulSoup(contentHTML, 'html.parser') 174 Editor.checklistInSoupToENML(soup)testFunctional.py https://github.com/eaudeweb/Naaya.git | Python | 151 lines
1import re 2from BeautifulSoup import BeautifulSoup 3 141 html = self.browser.get_html() 142 soup = BeautifulSoup(html) 143build.py https://code.google.com/p/python-for-android/ | Python | 204 lines
105print 'Installing BeautifulSoup.' 106beautifulsoup_path = os.path.join(pwd, 'python-libs','BeautifulSoup') 107compileall.compile_dir(beautifulsoup_path) 107compileall.compile_dir(beautifulsoup_path) 108shutil.copy(os.path.join(beautifulsoup_path, 'BeautifulSoup.pyc'), 109 'output/usr/lib/python2.6/BeautifulSoup.pyc')README.rst https://github.com/liberation/django_compressor.git | ReStructuredText | 71 lines
36is done using lxml_ or if it's not available Python's built-in HTMLParser by 37default. As an alternative Django Compressor provides a BeautifulSoup_ and a 38html5lib_ based parser, as well as an abstract base class that makes it easy to 58 59.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ 60.. _lxml: http://lxml.de/PROJECT_IDEAS.md https://gitlab.com/ini-python-lab-course/ss16 | Markdown | 126 lines
49* [import.io](https://import.io/): Service that extracts data from websites 50* [BeautifulSoup](http://www.crummy.com/software/BeautifulSoup/): Convenient access to content of a downloaded website 51* [Scrapy](http://scrapy.org/): Framework for scraping websitessingle.py https://gitlab.com/skororu/pysnippets | Python | 67 lines
8 9import bs4 # BeautifulSoup 10import requests # codes.ok, get 29 req = requests.get(url) 30 page = bs4.BeautifulSoup(req.text, 'lxml') 31 45 with requests.get(base_url) as req: 46 page = bs4.BeautifulSoup(req.text, 'lxml') 47 num_previous = page.find('a', rel='prev')['href']independent.py https://gitlab.com/harrigan/TPP | Python | 36 lines
5from crimespider.items import CrimeItem 6from bs4 import BeautifulSoup 7 30 article += c.extract() 31 s = BeautifulSoup(article, 'lxml') 32 print( s.get_text() )urls.html https://github.com/msparks/pyhole.git | HTML | 155 lines
64 65<span class="kn">from</span> <span class="nn">BeautifulSoup</span> <span class="kn">import</span> <span class="n">BeautifulSoup</span> 66 105 106 <span class="n">soup</span> <span class="o">=</span> <span class="n">BeautifulSoup</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span> 107urlnorm.py https://github.com/e1ven/Lonava.git | Python | 256 lines
28import urllib2 29from BeautifulSoup import BeautifulSoup 30import socket 69 try: 70 soup = BeautifulSoup(html) 71 links = soup.findAll('link')geoserver.py https://github.com/dotskapes/dotSkapes.git | Python | 104 lines
2from urllib2 import urlopen 3from BeautifulSoup import BeautifulStoneSoup 4version_check.py https://gitlab.com/mimizone/kolla | Python | 126 lines
19 20from bs4 import BeautifulSoup as bs 21from oslo_config import cfgreadability.py https://gitlab.com/zouxc/cola | Python | 368 lines
31except ImportError: 32 raise DependencyNotInstalledError("BeautifulSoup4") 33bugzilla_unittest.py https://github.com/weissms/owb-mirror.git | Python | 296 lines
34 35from modules.BeautifulSoup import BeautifulSoup 36 204 bugzilla = Bugzilla() 205 soup = BeautifulSoup(self._example_attachment) 206 attachment_element = soup.find("attachment")README.rst https://gitlab.com/gallaecio/chakraversiontracker | ReStructuredText | 245 lines
18 19- `beautifulsoup4 <https://www.crummy.com/software/BeautifulSoup/bs4/doc/>`_ 20helper.py https://github.com/macdylan/LBForum.git | Python | 44 lines
2# -*- coding: UTF-8 -*- 3from BeautifulSoup import BeautifulSoup, NavigableString 4from django.conf import settings 31def clean_html( fragment ): 32 soup = BeautifulSoup( fragment.strip() ) 33 def cleanup( soup ):generate_featured_pages.py https://github.com/pcdinh/trendingtopics.git | Python | 153 lines
13import urllib2 14from BeautifulSoup import BeautifulSoup 15import datetime 65 page = opener.open( url ).read() 66 soup = BeautifulSoup(page) 67 return soupfaq.rst https://gitlab.com/oytunistrator/scrapy | ReStructuredText | 286 lines
5 6How does Scrapy compare to BeautifulSoup or lxml? 7------------------------------------------------- 8 9`BeautifulSoup`_ and `lxml`_ are libraries for parsing HTML and XML. Scrapy is 10an application framework for writing web spiders that crawl web sites and 13Scrapy provides a built-in mechanism for extracting data (called 14:ref:`selectors <topics-selectors>`) but you can easily use `BeautifulSoup`_ 15(or `lxml`_) instead, if you feel more comfortable working with them. After 18 19In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like 20comparing `jinja2`_ to `Django`_. 21 22.. _BeautifulSoup: http://www.crummy.com/software/BeautifulSoup/ 23.. _lxml: http://lxml.de/ba.py https://github.com/barttenbrinke/Bartsidee-Repository.git | Python | 328 lines
13import cPickle as pickle 14from beautifulsoup.BeautifulSoup import BeautifulSoup 15 125 data = FetchUrl(samiurl, 0) 126 soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml") 127 i = 1 164 data = FetchUrl(path) 165 soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml") 166 i = 1soup.py https://github.com/scottjasta/Places.git | Python | 228 lines
2 3warnings.warn("BeautifulSoup 3.x (as of 3.1) is not fully compatible with html5lib and support will be removed in the future", DeprecationWarning) 4 4 5from BeautifulSoup import BeautifulSoup, Tag, NavigableString, Comment, Declaration 6 140 if namespaceHTMLElements: 141 warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning) 142 _base.TreeBuilder.__init__(self, namespaceHTMLElements) 144 def documentClass(self): 145 self.soup = BeautifulSoup("") 146 return Element(self.soup, self.soup, None) 162 if namespace is not None: 163 warnings.warn("BeautifulSoup cannot represent elements in any namespace", DataLossWarning) 164 return Element(Tag(self.soup, name), self.soup, namespace)ultimate.py https://gitlab.com/eientei95/crunchy-xml-decoder | Python | 351 lines
18import altfuncs 19from bs4 import BeautifulSoup 20from crunchyDec import CrunchyDec 231 media_id = page_url[-6:] 232 xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoPlayer_GetStandardConfig', media_id), 'xml') 233 249 media_id = xmlconfig.find('media_id').string 250 xmlconfig = BeautifulSoup(altfuncs.getxml('RpcApiVideoEncode_GetStreamInfo', media_id), 'xml') 251 host = xmlconfig.find('host').stringmodels.py https://github.com/agiliq/Dinette.git | Python | 375 lines
10import hashlib 11from BeautifulSoup import BeautifulSoup 12import datetime 260 def htmlfrombbcode(self): 261 soup = BeautifulSoup(self.message.raw) 262 #remove all html tags from the messageadapter_twcslibrarynet.py https://code.google.com/p/fanficdownloader/ | Python | 273 lines
25 26from .. import BeautifulSoup as bs 27from ..htmlcleanup import stripHTML 136 137 # use BeautifulSoup HTML parser to make everything easier to find. 138 soup = bs.BeautifulSoup(data) 227 228 # use BeautifulSoup HTML parser to make everything easier to find. 229 seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url)) 252 253 chapter=bs.BeautifulSoup('<div class="story"></div>') 254 254 255 soup = bs.BeautifulSoup(data) 256yes24_script.py https://github.com/jangxyz/yes24.git | Python | 193 lines
4import urllib, urllib2, cookielib 5from BeautifulSoup import BeautifulSoup 6from datetime import datetime 77 # parse 78 soup = BeautifulSoup(text) 79 order_list_table = soup.table(id="MyOrderListTbl")[0] 125 # parse 126 soup = BeautifulSoup(text) 127 order_price = soup.find(id="CLbTotOrdAmt").b.string 129 text = '<table>' + text[text[1:].find('<')+1:-7] + '</table>' 130 soup = BeautifulSoup(text) 131 point_saved = soup.find(attrs={'class':"price"}).b.stringzad_7.py https://gitlab.com/mmeisel/LV | Python | 43 lines
8import urllib 9from bs4 import BeautifulSoup 10 26html=urllib.urlopen(urlAddr, "lxml").read() #otvara se url 27soup=BeautifulSoup(html) #i deklarira objekt tipa BeautifulSoup 28_lxml.py https://bitbucket.org/bendikro/deluge-yarss-plugin.git | Python | 296 lines
60 def initialize_soup(self, soup): 61 """Let the BeautifulSoup object know about the standard namespace 62 mapping. 67 def _register_namespaces(self, mapping): 68 """Let the BeautifulSoup object know about namespaces encountered 69 while parsing the document. 74 if key and key not in self.soup._namespaces: 75 # Let the BeautifulSoup object know about a new namespace. 76 # If there are multiple namespaces defined with the same 189 190 # First, Let the BeautifulSoup object know about it. 191 self._register_namespaces(nsmap)nrlbot.py https://gitlab.com/g.davis13/nrlbot | Python | 344 lines
35 36from bs4 import BeautifulSoup 37from collections import namedtuple 100 r = requests.get(url) 101 soup = BeautifulSoup(r.text, 'html.parser') 102 return soup__init__.py https://bitbucket.org/yourcelf/old-intertwinkles.git | Python | 163 lines
19from django_browserid import get_audience 20from bs4 import BeautifulSoup 21 93 self.assertTrue("test@example.com" in res.content) 94 soup = BeautifulSoup(res.content) 95 self.assertEquals(soup.find(id="id_email").get("value"), 104 self.assertFalse("This address is unconfirmed" in res.content) 105 soup = BeautifulSoup(res.content) 106 self.assertEquals(soup.find(id="id_email").get("value"),conversation.py https://gitlab.com/sanchezfauste/TweetDigraph | Python | 108 lines
1from bs4 import BeautifulSoup 2import requests 95 if req.status_code == 200: 96 html = BeautifulSoup(req.text, 'html.parser') 97 conversations = html.find_all('li', {'class':'ThreadedConversation'})base_fetcher.py https://bitbucket.org/filmaster/filmaster-stable/ | Python | 193 lines
3 4from beautifulsoup import BeautifulSoup 5from optparse import make_option 19 20BeautifulSoup.MARKUP_MASSAGE += [ 21 (re.compile(r"<[^>]+>"), lambda tag:quote_re.sub(r"\1 \2", tag.group(0))), 65 def soup(self, data): 66 return BeautifulSoup(data) 67scrape.py https://github.com/sneeu/aliss_scrapers.git | Python | 113 lines
7 8from BeautifulSoup import BeautifulSoup 9 32 html = re.sub('<script.*?>[\s\S]*?</.*?script>', '', html) 33 soup = BeautifulSoup(html) 34index.html https://github.com/larsks/blog.oddbit.com.git | HTML | 240 lines
6 <title>Recent answers on StackOverflow · The Odd Bit</title> 7 <meta name="description" content="Traefik different ports for different Docker containers docker docker-compose traefik git push can not find -o option git Interact with podman docker via socket in Redhat 9 docker redhat podman Capturing commented text in an XML python xml beautifulsoup xml-comments How to execute a shell script as input on an interactive bash pod in Kubernetes? bash shell kubernetes Docker : Opensearch refuses connection with the example in opensearch documentation in docker python-3."> 8 <meta name="HandheldFriendly" content="True"> 142<li><p><a class="sx-answer sx-answer-accepted" href="https://stackoverflow.com/questions/72681436/capturing-commented-text-in-an-xml/72681822#72681822">Capturing commented text in an XML</a> 143 <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/python">python</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml">xml</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/beautifulsoup">beautifulsoup</a> <a class="sx-tag" href="https://stackoverflow.com/questions/tagged/xml-comments">xml-comments</a> </p></li> 144bills.py https://github.com/runderwood/openstates.git | Python | 138 lines
5import urllib2 6from BeautifulSoup import BeautifulSoup 7 12 13It includes a spurious </HEAD> before the useful data begins and lines like '<option value="Bill"selected="selected">Bill</option>', in which the lack of a space between the attributes confuses BeautifulSoup. 14''' 69 return False 70 soup = BeautifulSoup(cleansource(data)) 71 rows = soup.findAll('table')[1].findAll('tr')[1:] 91 with self.urlopen(histurl) as data: 92 soup = BeautifulSoup(cleansource(data)) 93 basicinfo = soup.findAll('div', id='bhistleft')[0]lequipe_fr.py https://gitlab.com/edelans/scoragora | Python | 180 lines
55 html = requests.get(url).text 56 soup = BeautifulSoup.BeautifulSoup(html) 57 # Get date 85 html = requests.get(day_url).text 86 soup = BeautifulSoup.BeautifulSoup(html) 87 result = [] 119 html = requests.get(url).text 120 soup = BeautifulSoup.BeautifulSoup(html) 121 root = soup.find(attrs={'name': 'IDNIVEAU'}) 147 html = requests.get(url).text 148 soup = BeautifulSoup.BeautifulSoup(html) 149 soup.find(id="timeline") 169 html = requests.get(url).text 170 soup = BeautifulSoup.BeautifulSoup(html) 171 hometeam_score = soup.find(id='scoDom')tpb.py https://github.com/SpLord/CouchPotato.git | Python | 160 lines
4from dateutil.parser import parse 5from imdb.parser.http.bsouplxml._bsoup import SoupStrainer, BeautifulSoup 6from urllib import quote_plus 63 tables = SoupStrainer('table') 64 html = BeautifulSoup(data, parseOnlyThese = tables) 65 resultTable = html.find('table', attrs = {'id':'searchResult'}) 154 div = SoupStrainer('div') 155 html = BeautifulSoup(data, parseOnlyThese = div) 156 html = html.find('div', attrs = {'class':'nfo'})searchengine.py https://github.com/kzfm1024/misc.git | Python | 306 lines
1import urllib2 2from BeautifulSoup import * 3from urlparse import urljoin 106 try: 107 soup=BeautifulSoup(c.read()) 108 self.addtoindex(page,soup)__init__.py https://bitbucket.org/katey_hack/kindle-touch-l10n.git | Python | 96 lines
40 treeType - the name of the tree type required (case-insensitive). Supported 41 values are "simpletree", "dom", "etree" and "beautifulsoup" 42 51 ElementTree, cElementTree and lxml.etree). 52 "beautifulsoup" - Beautiful soup (if installed) 53 70 treeBuilderCache[treeType] = simpletree.TreeBuilder 71 elif treeType == "beautifulsoup": 72 import soupKitMensaService.py https://gitlab.com/namboy94/messengerbot | Python | 230 lines
26import requests 27from bs4 import BeautifulSoup 28from typing import Tuple 162 html = requests.get(url).text 163 soup = BeautifulSoup(html, "html.parser") 164 resource = soup.select('body')2010-11-21-exploring_art_data_3.md https://gitlab.com/rheaplex/robmyers.org | Markdown | 172 lines
23 <tt>#!/usr/bin/python 24 from BeautifulSoup import BeautifulStoneSoup 25 import retest_markdown_to_html.py https://gitlab.com/Ivy001/pants | Python | 186 lines
173 174 soup = bs4.BeautifulSoup(markup=html) 175 self.assertIsNotNone(soup.find(text='A good link:'))utils.py https://github.com/Gautier/django-page-cms.git | Python | 139 lines
119 return content 120 from BeautifulSoup import BeautifulSoup 121 tree = BeautifulSoup(content)util.py https://gitlab.com/Lett1/SlackDuckBot | Python | 151 lines
5import re 6from bs4 import BeautifulSoup 7from urllib.request import Request, urlopen 113 else: 114 soup = BeautifulSoup(html, "lxml") 115 if soup.title is not None:release.py https://gitlab.com/LocutusOfPenguin/python-chess | Python | 178 lines
141 sys.exit(1) 142 soup = bs4.BeautifulSoup(res.text, "html.parser") 143 csrf = soup.find("input", {"name": "CSRFToken"})["value"]types.py https://github.com/rxuriguera/bibtexIndexMaker.git | Python | 87 lines
37 self.msg attribute contains explanation why parsing failed 38 self.tag attribute contains BeautifulSoup object with the most relevant tag 39 that failed to parsepost.py https://github.com/mw44118/blogofile.git | Python | 309 lines
22import logging 23import BeautifulSoup 24 138 """Retrieve excerpt from article""" 139 s = BeautifulSoup.BeautifulSoup(self.content) 140 # get rid of javascript, noscript and csssetup.py https://github.com/eged/django-blog-zinnia.git | Python | 37 lines
33 'akismet', 34 'BeautifulSoup', 35 ])inject.py https://gitlab.com/BoTranVan/MITMf | Python | 195 lines
23 24from bs4 import BeautifulSoup 25from plugins.plugin import Plugin 86 if encoding is not None: 87 html = BeautifulSoup(data.decode(encoding, "ignore"), "lxml") 88 else: 88 else: 89 html = BeautifulSoup(data, "lxml") 90 98 if self.html_payload: 99 payload = BeautifulSoup(self.html_payload, "html.parser") 100 html.body.append(payload) 104 with open(self.html_file, 'r') as file: 105 payload = BeautifulSoup(file.read(), "html.parser") 106 html.body.append(payload)share_post.py https://gitlab.com/janninematt/janninematt | Python | 81 lines
8 9from bs4 import BeautifulSoup 10try: 18def article_title(content): 19 main_title = BeautifulSoup(content.title, 'html.parser').get_text().strip() 20 sub_title = '' 21 if hasattr(content, 'subtitle'): 22 sub_title = ' ' + BeautifulSoup(content.subtitle, 'html.parser').get_text().strip() 23 return quote(('%s%s' % (main_title, sub_title)).encode('utf-8')) 31def article_summary(content): 32 return quote(BeautifulSoup(content.summary, 'html.parser').get_text().strip().encode('utf-8')) 33cablemodem_check.py https://gitlab.com/mikeos2/Nagios_Plugins | Python | 223 lines
38try: 39 from bs4 import BeautifulSoup 40except ImportError: 40except ImportError: 41 print "Error: (" + str(Nagios_UNKNOWN) + ") install BeautifulSoup!" 42 sys.exit(Nagios_UNKNOWN) 69 70 return BeautifulSoup(page) 71subtitle-downloader.py https://gitlab.com/132nd-etcher/subtitle-downloader | Python | 124 lines
18import requests,time,re,zipfile 19from bs4 import BeautifulSoup 20PY_VERSION = sys.version_info[0] 74 r=requests.get("http://subscene.com/subtitles/release?q="+root); 75 soup=BeautifulSoup(r.content,"lxml") 76 atags=soup.find_all("a") 83 r=requests.get("http://subscene.com"+href); 84 soup=BeautifulSoup(r.content,"lxml") 85 lin=soup.find_all('a',attrs={'id':'downloadButton'})[0].get("href") 86 r=requests.get("http://subscene.com"+lin); 87 soup=BeautifulSoup(r.content,"lxml") 88 subfile=open(root2+".zip", 'wb')base.py https://github.com/tallstreet/jaikuenginepatch.git | Python | 209 lines
17 18from beautifulsoup import BeautifulSoup 19 193 self.assertWellformed(response) 194 parsed = BeautifulSoup.BeautifulSoup(response.content) 195 found = parsed.findAll('a', attrs = { 'class': link_class})get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 117 lines
14 state = 'nc' 15 soup_parser = html5lib.HTMLParser(tree=html5lib.treebuilders.getTreeBuilder('beautifulsoup')).parse 16Weather.py https://gitlab.com/leiftomas/jasper-client | Python | 172 lines
43 'international_cities.asp') 44 soup = bs4.BeautifulSoup(r.text) 45 data = soup.find(id="inner-content").find('pre').stringbaseparser.py https://gitlab.com/andyblaesus/newsdiffs | Python | 156 lines
52# Ick. 53from BeautifulSoup import BeautifulSoup 54def bs_fixed_getText(self, separator=u""): 54def bs_fixed_getText(self, separator=u""): 55 bsmod = sys.modules[BeautifulSoup.__module__] 56 if not len(self.contents): 65 return separator.join(strings) 66sys.modules[BeautifulSoup.__module__].Tag.getText = bs_fixed_getText 67# End fix 111 112 feeder_bs = BeautifulSoup #use this version of beautifulsoup for feed 113fileops.py https://gitlab.com/j000sh/hackerrank-to-git | Python | 136 lines
6from pprint import pprint 7from bs4 import BeautifulSoup 8from sh import git 87 with open(filename, 'w') as f: 88 f.write(BeautifulSoup(html, 'html5lib').prettify() + '\n') 89 gitCommitModel(contest['model'], filename, 'contest created: ' + model['slug']) 105 with open(filename, 'w') as f: 106 f.write(BeautifulSoup(html, "html5lib").prettify() + "\n") 107 gitCommitModel(challenge, filename, 'challenge created: ' + challenge['slug'])plugin.py https://github.com/lbjay/supybot-plugins.git | Python | 47 lines
9 10from BeautifulSoup import BeautifulSoup 11 23 24 soup = BeautifulSoup(doc) 25 dd = soup.find('dd', 'highlight')wikipediaidevice.py https://github.com/RoDaniel/featurehouse.git | Python | 211 lines
4import re 5from exe.engine.beautifulsoup import BeautifulSoup 6from exe.engine.idevice import Idevice 73 page = page.replace(u' ', u' ') 74 soup = BeautifulSoup(page, False) 75 content = soup.first('div', {'id': "content"})sitegen.py https://gitlab.com/Ivy001/pants | Python | 374 lines
36 import bs4 37 return bs4.BeautifulSoup(*args, **kwargs) 38 70def load_soups(config): 71 """Generate BeautifulSoup AST for each page listed in config.""" 72 soups = {}phew.py https://gitlab.com/fnaticshank/crawler | Python | 327 lines
13import requests 14from bs4 import BeautifulSoup 15 213 errors="replace") 214 soup = BeautifulSoup(content, "lxml") 215 tags = soup('a')test_pipreqs.py https://gitlab.com/Kravcenko/pipreqs | Python | 188 lines
23 'peewee', 'ujson', 'nonexistendmodule', 'bs4', 'after_method_is_valid_even_if_not_pep8' ] 24 self.modules2 = ['beautifulsoup4'] 25 self.local = ["docopt", "requests", "nose", 'pyflakes']layouttestresults.py https://gitlab.com/x33n/phantomjs | Python | 91 lines
31from webkitpy.common.net.resultsjsonparser import ResultsJSONParser 32from webkitpy.thirdparty.BeautifulSoup import BeautifulSoup, SoupStrainer 33from webkitpy.layout_tests.models import test_results__init__.py https://github.com/oesmith/django-css.git | Python | 316 lines
3import subprocess 4from BeautifulSoup import BeautifulSoup 5from tempfile import NamedTemporaryFile 54 self.split_content = [] 55 self.soup = BeautifulSoup(self.content) 56 self.xhtml = xhtml 268 basename = os.path.splitext(os.path.basename(filename))[0] 269 elem = BeautifulSoup(re.sub(basename+ext,basename+'.css',unicode(elem))) 270 filename = path + '.css'accelometer_metadata_creator.py https://gitlab.com/heavelock/metadata_creator | Python | 198 lines
1from bs4 import BeautifulSoup 2import scipy.io 9def parse_event_names(file): 10 soup = BeautifulSoup(open(file), 'lxml-xml') 11 parsed_meta = {}testFunctional.py https://github.com/eaudeweb/Naaya.git | Python | 190 lines
4from StringIO import StringIO 5from BeautifulSoup import BeautifulSoup 6from mock import patch 180 html = self.browser.get_html() 181 soup = BeautifulSoup(html) 182shotchart_cbssports.py https://github.com/kpascual/nbascrape.git | Python | 127 lines
6import logging 7from BeautifulSoup import BeautifulSoup 8from libscrape.config import constantsMakefile https://github.com/freebsd/freebsd-ports.git | Makefile | 34 lines
17 ${PYTHON_PKGNAMEPREFIX}psutil>=2.0:sysutils/py-psutil@${PY_FLAVOR} 18RUN_DEPENDS= ${PYTHON_PKGNAMEPREFIX}beautifulsoup>=4.2.1:www/py-beautifulsoup@${PY_FLAVOR} \ 19 ${PYTHON_PKGNAMEPREFIX}importlib-metadata>0:devel/py-importlib-metadata@${PY_FLAVOR} \pingback.py https://github.com/aparo/django-blog-zinnia.git | Python | 141 lines
17from zinnia.settings import PINGBACK_CONTENT_LENGTH 18from BeautifulSoup import BeautifulSoup 19from django_xmlrpc.decorators import xmlrpc_func 94 95 soup = BeautifulSoup(document) 96 title = soup.find('title')1.4.9-0004-Fix-3608-Replace-discogs-client-with-python3-discogs.patch https://gitlab.com/redcore/portage | Patch | 134 lines
71- tests_require=[ 72- 'beautifulsoup4', 73- 'flask', 88+ 'test': [ 89+ 'beautifulsoup4', 90+ 'coverage',setup.py https://gitlab.com/simont3/awftp | Python | 117 lines
91 # https://packaging.python.org/en/latest/technical.html#install-requires-vs-requirements-files 92 install_requires=['requests>=2.13.0', 'click>=6.7', 'beautifulsoup4>=4.6.0'], 93jitsimeetbridge.py https://gitlab.com/JigmeDatse/synapse | Python | 260 lines
15import grequests 16from BeautifulSoup import BeautifulSoup 17import json 144 resps = grequests.map([req]) 145 obj = BeautifulSoup(resps[0].content) 146 return obj 163 self.ssrcs = {} 164 jingleSoup = BeautifulSoup(jingle) 165 for cont in jingleSoup.iq.jingle.findAll('content'):adapter_twilightednet.py https://code.google.com/p/fanficdownloader/ | Python | 254 lines
25 26from .. import BeautifulSoup as bs 27from ..htmlcleanup import stripHTML 128 129 # use BeautifulSoup HTML parser to make everything easier to find. 130 soup = bs.BeautifulSoup(data) 214 215 # use BeautifulSoup HTML parser to make everything easier to find. 216 seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))olympic.py https://gitlab.com/camilo-celis/DB_SQuirreL | Python | 250 lines
8import urllib2 9from bs4 import BeautifulSoup 10import csv 31 #Create the soup object from the HTML data 32 self.soup = BeautifulSoup(html_data) 33 40 url = "http://databaseolympics.com/games/gamesyear.htm?g=%s" % link['href'].split('=')[1] 41 page = BeautifulSoup(urllib2.urlopen(url).read(), from_encoding="iso-8859-1") 42 56 def get_country_data(self, country_url): 57 page = BeautifulSoup(urllib2.urlopen(country_url).read(), from_encoding="iso-8859-1") 58 83 84 page = BeautifulSoup(urllib2.urlopen(url).read(), from_encoding="iso-8859-1") 85download.py https://gitlab.com/qijungu/stock | Python | 271 lines
3 4from bs4 import BeautifulSoup 5from datetime import datetime 68 r = session.get(hurl) 69 page = str(BeautifulSoup(r.content, 'lxml')) 70 82 r = session.get(durl) 83 page = BeautifulSoup(r.content, 'lxml').select_one('p') 84 193 r = session.get(hurl) 194 page = BeautifulSoup(r.content, 'lxml').select_one('p') 195 data = page.text.strip()importer.py https://github.com/gregmalcolm/Bookie.git | Python | 201 lines
2from datetime import datetime 3from BeautifulSoup import BeautifulSoup 4from bookie.models import BmarkMgr 78 79 soup = BeautifulSoup(file_io) 80 can_handle = False 90 """Given a file, process it""" 91 soup = BeautifulSoup(self.file_handle) 92 140 """ 141 soup = BeautifulSoup(file_io) 142 can_handle = False 157 """ 158 soup = BeautifulSoup(self.file_handle) 159 if not soup.contents[0] == "DOCTYPE NETSCAPE-Bookmark-file-1":nzbclub.py https://gitlab.com/132nd-etcher/CouchPotatoServer | Python | 100 lines
2 3from bs4 import BeautifulSoup 4from couchpotato.core.helpers.encoding import toUnicode 56 full_description = self.getCache('nzbclub.%s' % item['id'], item['detail_url'], cache_timeout = 25920000) 57 html = BeautifulSoup(full_description) 58 nfo_pre = html.find('pre', attrs = {'class': 'nfo'})redditstories.py https://github.com/tuxcanfly/django-socialnews.git | Python | 249 lines
18import datetime 19from BeautifulSoup import BeautifulSoup 20 71 stories = [] 72 soup = BeautifulSoup(content) 73 entries = soup.findAll('div', id=re.compile('entry_.*')) 187def _get_next_page(content): 188 soup = BeautifulSoup(content) 189 a = soup.find(lambda tag: tag.name == 'a' and tag.string == 'next')grade-interactive.py https://gitlab.com/rshipp/mines-grading | Python | 194 lines
14# Everything should work with Python 2.7+, although I haven't explicitly tested 15# it on 3+. The logging/debugging stuff makes use of the BeautifulSoup library 16# for parsing HTML (http://www.crummy.com/software/BeautifulSoup), but it's not 21import mechanize, getpass, re, time, zipfile, glob, os, subprocess, sys 22from bs4 import BeautifulSoup 23 23 24# Logging: Write BeautifulSoup HTML to file 25def writeHTML(page, filename): 68initialResponse = browser.open("https://cs.mcprogramming.com/djintro/entry/") 69homeHTML = BeautifulSoup(initialResponse.get_data()) 70# writeHTML(homeHTML, 'home') 71 72# Find the login link using BeautifulSoup and follow it 73loginLink = homeHTML.find('a', 'mymaillogin')udacity_crawler.py https://gitlab.com/fnaticshank/crawler | Python | 263 lines
4import robotexclusionrulesparser as rerp 5from bs4 import BeautifulSoup 6from urlparse import urlparse, urljoin 140 print "[get_page()] Page off limits!" 141 return BeautifulSoup(""), "" 142 if url in cache: 147 content = urllib.urlopen(url).read() 148 return BeautifulSoup(content), url 149 except: 149 except: 150 return BeautifulSoup(""), "" 151get_legislation.py https://github.com/rshapiro/fiftystates.git | Python | 184 lines
3import re 4from BeautifulSoup import BeautifulSoup 5import datetime as dt 50 self.log("Getting bill list for %s %s" % (chamber, session)) 51 bill_list = BeautifulSoup(self.urlopen(bill_list_url)) 52 59 bill_info_url = "http://www.leg.state.vt.us" + bill_link['href'] 60 info_page = BeautifulSoup(self.urlopen(bill_info_url)) 61 109 self.log("Getting bill list for %s %s" % (chamber, session)) 110 bill_list = BeautifulSoup(urllib2.urlopen(bill_list_url, data)) 111 117 118 info_page = BeautifulSoup(self.urlopen( 119 "http://www.leg.state.vt.us" + bill_link['href']))bsoupxpath.py https://github.com/jsmiller84/CouchPotato.git | Python | 394 lines
3 4This module provides XPath support for BeautifulSoup. 5 28import string 29import _bsoup as BeautifulSoup 30 107 # for an absolute path, start from the root 108 if not isinstance(node, BeautifulSoup.Tag) \ 109 or (node.name != '[document]'): 180 """Parse the predicate. Return a callable that can be used to filter 181 nodes. Update `self.soup_args` to take advantage of BeautifulSoup search 182 features. 268 last = node 269 while (not isinstance(last, BeautifulSoup.NavigableString)) \ 270 and (len(last.contents) > 0):__init__.py https://bitbucket.org/rattray/popcorn-portal.git | Python | 307 lines
75 76# The BeautifulSoup class will take feature lists from developers and use them 77# to look up builders in this registry.amf_serializer.rb https://github.com/brownman/flexonrails.git | Ruby | 364 lines
111 112 elsif (value.class.to_s == 'BeautifulSoup') 113 write_xml(value.to_s) 159 # I know we can combine this with the last condition, but don't ; the Rexml and Beautiful Soup test is expensive, and for large record sets with many AR its better to be able to skip the next step 160 elsif value.is_a?(ActiveRecord::Base) # Aryk: this way, we can bypass the "['REXML::Document', 'BeautifulSoup'].include?(value.class.to_s) " operation 161 write_amf3_object(value) 162 163 elsif ['REXML::Document', 'BeautifulSoup'].include?(value.class.to_s) 164 write_byte(AMF3_XML)convert_notebooks_to_html_partial.py https://gitlab.com/dibya/textbook-tools | Python | 152 lines
134 """Return a html partial of divs with cell contents.""" 135 doc = bs4.BeautifulSoup(html, 'html5lib') 136extraer_datos_composicion_alimentos.py https://gitlab.com/FoodUpProject/FoodUp | Python | 54 lines
2import urllib2,unicodedata 3from bs4 import BeautifulSoup 4 7 html = conexion.read() 8 soup = BeautifulSoup(html) 9 #obtenemos una lista de String con la condición de atributos class con valores details y pricesponsoredlinks.py https://gitlab.com/oytunistrator/PwnBerryPi | Python | 235 lines
14from htmlentitydefs import name2codepoint 15from BeautifulSoup import BeautifulSoup 16 30 self.msg attribute contains explanation why parsing failed 31 self.tag attribute contains BeautifulSoup object with the most relevant tag that failed to parse 32 Thrown only in debug mode 158 159 return BeautifulSoup(page) 160default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 341 lines
46 sys.path.append(os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib')) 47 from BeautifulSoup import BeautifulSoup 48except: 50 sys.path.insert(0, os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib')) 51 from BeautifulSoup import BeautifulSoup 52 except: 53 sys.path.append(os.path.join(os.getcwd(), r'resources', r'lib')) 54 from BeautifulSoup import BeautifulSoup 55 icon = xbmc.translatePath(os.path.join(os.getcwd().replace(';', ''),'icon.png')) 136 html = re.compile('<body>(.+?)<\/body>', re.MULTILINE|re.DOTALL).findall(html)[0] 137 soup = BeautifulSoup(html) 138 162 html = re.compile('<body>(.+?)<\/body>', re.MULTILINE|re.DOTALL).findall(html)[0] 163 soup = BeautifulSoup(html) 164get_legislation.py https://github.com/BRIMIL01/fiftystates.git | Python | 118 lines
4import re 5from BeautifulSoup import BeautifulSoup 6 45 doc = response.read() 46 soup = BeautifulSoup(doc) 47browser.py https://gitlab.com/phyks/weboob | Python | 120 lines
22from weboob.deprecated.browser.parsers.iparser import IParser 23import BeautifulSoup 24 32 def parse(self, data, encoding=None): 33 return BeautifulSoup.BeautifulSoup(data.read().decode(encoding or 'utf-8'), convertEntities=BeautifulSoup.BeautifulStoneSoup.ALL_ENTITIES) 34bills.py https://github.com/runderwood/openstates.git | Python | 114 lines
3import re 4from BeautifulSoup import BeautifulSoup 5 45 with self.urlopen(search_url + '?' + params) as doc: 46 soup = BeautifulSoup(doc) 47upnp.py https://gitlab.com/balhau/pyutils | Python | 152 lines
8import requests 9from bs4 import BeautifulSoup 10 95 96soup=BeautifulSoup(r.data) 97 135 136ipxml=BeautifulSoup(r.text) 137 145 146rhxml=BeautifulSoup(r.text) 147testFunctional.py https://github.com/mihneasim/Naaya.git | Python | 208 lines
23from StringIO import StringIO 24from BeautifulSoup import BeautifulSoup 25 193 html = self.browser.get_html() 194 soup = BeautifulSoup(html) 195default.py http://seppius-xbmc-repo.googlecode.com/svn/trunk/ | Python | 259 lines
31 sys.path.append(os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib')) 32 from BeautifulSoup import BeautifulSoup 33except: 35 sys.path.insert(0, os.path.join(Addon.getAddonInfo('path'), r'resources', r'lib')) 36 from BeautifulSoup import BeautifulSoup 37 except: 38 sys.path.append(os.path.join(os.getcwd(), r'resources', r'lib')) 39 from BeautifulSoup import BeautifulSoup 40 icon = xbmc.translatePath(os.path.join(os.getcwd().replace(';', ''),'icon.png')) 77 78 soup = BeautifulSoup(html, fromEncoding="windows-1251") 79 119 120 soup = BeautifulSoup(html, fromEncoding="windows-1251") 121rfc822.py https://gitlab.com/wilane/superdesk | Python | 264 lines
23from superdesk.errors import IngestEmailError 24from bs4 import BeautifulSoup, Comment, Doctype 25import re 218 try: 219 # BeautifulSoup is catching out-of-order and unclosed tags, so markup 220 # can't leak out of comments and break the rest of the page. 220 # can't leak out of comments and break the rest of the page. 221 soup = BeautifulSoup(html) 222 except Exception as e:simple_dehasher.py https://gitlab.com/8wiw/python-dehasher | Python | 165 lines
16 import time 17 from bs4 import BeautifulSoup 18 from colorama import Fore, Style 130 PAGE = requests.get(URL, headers=HEADERS) # Uses requests lib to get the content of the page 131 PAGE_CONTENT = BeautifulSoup(PAGE.content, "html.parser").get_text() 132#-------------------------------------------------------------------------------------------------- 138 PAGE = requests.get(URL, headers=HEADERS) # Uses requests lib to get the content of the page 139 PAGE_CONTENT = BeautifulSoup(PAGE.content, "html.parser").get_text() 140 if "ERROR CODE : 001" in PAGE_CONTENT:TextRank.py https://bitbucket.org/arka7z/information-retrieval.git | Python | 271 lines
16from functools import reduce 17from bs4 import BeautifulSoup 18from nltk.tokenize.punkt import PunktSentenceTokenizer 78 file_content=f.read() 79 soup = BeautifulSoup(file_content, "lxml") 80 text_group = soup.get_text()scrape.py https://gitlab.com/mkhouri/news_scraper | Python | 72 lines
1import re 2from bs4 import BeautifulSoup 3from urllib.parse import urlparse 7def parse(url, pageHtml, bodyLines): 8 soup = BeautifulSoup(pageHtml, "lxml") 9 host = urlparse(url).hostname4chan_downloader.py https://gitlab.com/8wiw/4chan_downloader | Python | 147 lines
7 import requests, time, sys, os 8 from bs4 import BeautifulSoup 9 from colorama import Fore, Style 91 log_user_stop(board) 92 souped = BeautifulSoup(r.text, 'html.parser') 93 img_tags = souped.find_all('img')adapter_adastrafanficcom.py https://code.google.com/p/fanficdownloader/ | Python | 239 lines
25 26from .. import BeautifulSoup as bs 27from ..htmlcleanup import stripHTML 95 96 # use BeautifulSoup HTML parser to make everything easier to find. 97 soup = bs.BeautifulSoup(data) 199 200 # use BeautifulSoup HTML parser to make everything easier to find. 201 seriessoup = bs.BeautifulSoup(self._fetchUrl(series_url))hulu.py https://github.com/barttenbrinke/Bartsidee-Repository.git | Python | 124 lines
3import ba, md5, time, base64 4from beautifulsoup.BeautifulSoup import BeautifulSoup 5from urllib import quote_plus 24 data = data.replace('\\u003c','<').replace('\\u003e','>').replace('\\','').replace('\\n','').replace('\\t','') 25 soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml") 26 42 43 soup = BeautifulSoup(data, convertEntities="xml", smartQuotesTo="xml") 44 totalpage = len(soup.findAll('tr', 'srh')) 64 data = data.replace('\\u003c','<').replace('\\u003e','>').replace('\\','') 65 soup = BeautifulSoup(data) 66utils.py https://github.com/theinterned/batucada.git | Python | 116 lines
9 10from BeautifulSoup import BeautifulSoup 11 70 """ 71 soup = BeautifulSoup(content) 72 links = soup.findAll('link') 73 74 # BeautifulSoup instances are not actually dictionaries, so 75 # we can't use the more proper 'key in dict' syntax and