--- BeautifulSoup.py
+++ BeautifulSoup.py
@@ -76,7 +76,7 @@
import sys
soup = BeautifulSoup(sys.stdin)
+ print(soup.prettify())
--- BeautifulSoupTests.py
+++ BeautifulSoupTests.py
@@ -82,7 +82,7 @@
def testFindAllText(self):
soup = BeautifulSoup("<html>\xbb</html>")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
+ soup = BeautifulSoup("Räksmörgås".encode("utf-8"),
fromEncoding=bad_encoding)
--- BeautifulSoup.py
+++ BeautifulSoup.py
@@ -76,7 +76,7 @@
import sys
soup = BeautifulSoup(sys.stdin)
+ print(soup.prettify())
--- BeautifulSoupTests.py
+++ BeautifulSoupTests.py
@@ -82,7 +82,7 @@
def testFindAllText(self):
soup = BeautifulSoup("<html>\xbb</html>")
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
+ soup = BeautifulSoup("Räksmörgås".encode("utf-8"),
fromEncoding=bad_encoding)
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
It's much more common for someone to forget to close a 'b' tag
than to actually use nested 'b' tags, and the BeautifulSoup class
case: where you can't believe someone wrote what they did, but
it's valid HTML and BeautifulSoup screwed up by assuming it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
well-formed, you can use this library to find and process the
well-formed part of it. The BeautifulSoup class
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
sgmllib will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
well-formed, you can use this library to find and process the
well-formed part of it. The BeautifulSoup class
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
sgmllib will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
NESTABLE_TAGS = buildTagMap([], BeautifulSoup.NESTABLE_TAGS,
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
HTMLParser will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
HTMLParser will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
HTMLParser will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
sgmllib will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
sgmllib will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
treating as nestable a tag your page author treats as nestable,
try ICantBelieveItsBeautifulSoup, MinimalSoup, or
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
It's much more common for someone to forget to close a 'b' tag
than to actually use nested 'b' tags, and the BeautifulSoup class
case: where you can't believe someone wrote what they did, but
it's valid HTML and BeautifulSoup screwed up by assuming it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
HTMLParser will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
sgmllib will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
sgmllib will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
treating as nestable a tag your page author treats as nestable,
try ICantBelieveItsBeautifulSoup, MinimalSoup, or
class RobustHTMLParser(BeautifulSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""
The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
It's much more common for someone to forget to close a 'b' tag
than to actually use nested 'b' tags, and the BeautifulSoup class
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
This class is useful for parsing XML or made-up markup languages,
or when BeautifulSoup makes an assumption counter to what you were
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"""The BeautifulSoup class is oriented towards skipping over
This is perfectly valid (if bizarre) HTML. However, the
BeautifulSoup class will implicitly close the first b tag when it
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
# This works around the bug referenced in
# BeautifulSoup.py.3.diff that comes with BeautifulSoup by
HTMLParser will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):
"The Screen-Scraper's Friend"
http://www.crummy.com/software/BeautifulSoup/
* BeautifulSoup, for parsing run-of-the-mill HTML code, be it valid
documentation:
http://www.crummy.com/software/BeautifulSoup/documentation.html
# This works around the bug referenced in
# BeautifulSoup.py.3.diff that comes with BeautifulSoup by
HTMLParser will process most bad HTML, and the BeautifulSoup
class BeautifulSoup(BeautifulStoneSoup):
class ICantBelieveItsBeautifulSoup(BeautifulSoup):