PageRenderTime 65ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 1ms

/dev-python/beautifulsoup/files/beautifulsoup-3.1.0.1-python-3.patch

https://bitbucket.org/lmnd/gx86-cropped
Patch | 949 lines | 852 code | 97 blank | 0 comment | 0 complexity | b0ffeb7348e51767ac952290916a24f9 MD5 | raw file
Possible License(s): LGPL-2.1, LGPL-2.0, GPL-3.0, GPL-2.0, LGPL-3.0, MIT, AGPL-3.0, CC-BY-SA-4.0, Apache-2.0, AGPL-1.0, Unlicense, BitTorrent-1.0, BSD-3-Clause, MPL-2.0-no-copyleft-exception, CC-BY-SA-3.0
  1. --- BeautifulSoup.py
  2. +++ BeautifulSoup.py
  3. @@ -76,7 +76,7 @@
  4. SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE, DAMMIT.
  5. """
  6. -from __future__ import generators
  7. +
  8. __author__ = "Leonard Richardson (leonardr@segfault.org)"
  9. __version__ = "3.1.0.1"
  10. @@ -84,12 +84,12 @@
  11. __license__ = "New-style BSD"
  12. import codecs
  13. -import markupbase
  14. +import _markupbase
  15. import types
  16. import re
  17. -from HTMLParser import HTMLParser, HTMLParseError
  18. +from html.parser import HTMLParser, HTMLParseError
  19. try:
  20. - from htmlentitydefs import name2codepoint
  21. + from html.entities import name2codepoint
  22. except ImportError:
  23. name2codepoint = {}
  24. try:
  25. @@ -98,18 +98,18 @@
  26. from sets import Set as set
  27. #These hacks make Beautiful Soup able to parse XML with namespaces
  28. -markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
  29. +_markupbase._declname_match = re.compile(r'[a-zA-Z][-_.:a-zA-Z0-9]*\s*').match
  30. DEFAULT_OUTPUT_ENCODING = "utf-8"
  31. # First, the classes that represent markup elements.
  32. -def sob(unicode, encoding):
  33. +def sob(str, encoding):
  34. """Returns either the given Unicode string or its encoding."""
  35. if encoding is None:
  36. - return unicode
  37. + return str
  38. else:
  39. - return unicode.encode(encoding)
  40. + return str.encode(encoding)
  41. class PageElement:
  42. """Contains the navigational information for some part of the page
  43. @@ -178,8 +178,8 @@
  44. return lastChild
  45. def insert(self, position, newChild):
  46. - if (isinstance(newChild, basestring)
  47. - or isinstance(newChild, unicode)) \
  48. + if (isinstance(newChild, str)
  49. + or isinstance(newChild, str)) \
  50. and not isinstance(newChild, NavigableString):
  51. newChild = NavigableString(newChild)
  52. @@ -334,7 +334,7 @@
  53. g = generator()
  54. while True:
  55. try:
  56. - i = g.next()
  57. + i = g.__next__()
  58. except StopIteration:
  59. break
  60. if i:
  61. @@ -385,22 +385,22 @@
  62. def toEncoding(self, s, encoding=None):
  63. """Encodes an object to a string in some encoding, or to Unicode.
  64. ."""
  65. - if isinstance(s, unicode):
  66. + if isinstance(s, str):
  67. if encoding:
  68. s = s.encode(encoding)
  69. elif isinstance(s, str):
  70. if encoding:
  71. s = s.encode(encoding)
  72. else:
  73. - s = unicode(s)
  74. + s = str(s)
  75. else:
  76. if encoding:
  77. s = self.toEncoding(str(s), encoding)
  78. else:
  79. - s = unicode(s)
  80. + s = str(s)
  81. return s
  82. -class NavigableString(unicode, PageElement):
  83. +class NavigableString(str, PageElement):
  84. def __new__(cls, value):
  85. """Create a new NavigableString.
  86. @@ -410,12 +410,12 @@
  87. passed in to the superclass's __new__ or the superclass won't know
  88. how to handle non-ASCII characters.
  89. """
  90. - if isinstance(value, unicode):
  91. - return unicode.__new__(cls, value)
  92. - return unicode.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
  93. + if isinstance(value, str):
  94. + return str.__new__(cls, value)
  95. + return str.__new__(cls, value, DEFAULT_OUTPUT_ENCODING)
  96. def __getnewargs__(self):
  97. - return (unicode(self),)
  98. + return (str(self),)
  99. def __getattr__(self, attr):
  100. """text.string gives you text. This is for backwards
  101. @@ -424,7 +424,7 @@
  102. if attr == 'string':
  103. return self
  104. else:
  105. - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__.__name__, attr)
  106. + raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__.__name__, attr))
  107. def encode(self, encoding=DEFAULT_OUTPUT_ENCODING):
  108. return self.decode().encode(encoding)
  109. @@ -435,23 +435,23 @@
  110. class CData(NavigableString):
  111. def decodeGivenEventualEncoding(self, eventualEncoding):
  112. - return u'<![CDATA[' + self + u']]>'
  113. + return '<![CDATA[' + self + ']]>'
  114. class ProcessingInstruction(NavigableString):
  115. def decodeGivenEventualEncoding(self, eventualEncoding):
  116. output = self
  117. - if u'%SOUP-ENCODING%' in output:
  118. + if '%SOUP-ENCODING%' in output:
  119. output = self.substituteEncoding(output, eventualEncoding)
  120. - return u'<?' + output + u'?>'
  121. + return '<?' + output + '?>'
  122. class Comment(NavigableString):
  123. def decodeGivenEventualEncoding(self, eventualEncoding):
  124. - return u'<!--' + self + u'-->'
  125. + return '<!--' + self + '-->'
  126. class Declaration(NavigableString):
  127. def decodeGivenEventualEncoding(self, eventualEncoding):
  128. - return u'<!' + self + u'>'
  129. + return '<!' + self + '>'
  130. class Tag(PageElement):
  131. @@ -460,7 +460,7 @@
  132. def _invert(h):
  133. "Cheap function to invert a hash."
  134. i = {}
  135. - for k,v in h.items():
  136. + for k,v in list(h.items()):
  137. i[v] = k
  138. return i
  139. @@ -479,23 +479,23 @@
  140. escaped."""
  141. x = match.group(1)
  142. if self.convertHTMLEntities and x in name2codepoint:
  143. - return unichr(name2codepoint[x])
  144. + return chr(name2codepoint[x])
  145. elif x in self.XML_ENTITIES_TO_SPECIAL_CHARS:
  146. if self.convertXMLEntities:
  147. return self.XML_ENTITIES_TO_SPECIAL_CHARS[x]
  148. else:
  149. - return u'&%s;' % x
  150. + return '&%s;' % x
  151. elif len(x) > 0 and x[0] == '#':
  152. # Handle numeric entities
  153. if len(x) > 1 and x[1] == 'x':
  154. - return unichr(int(x[2:], 16))
  155. + return chr(int(x[2:], 16))
  156. else:
  157. - return unichr(int(x[1:]))
  158. + return chr(int(x[1:]))
  159. elif self.escapeUnrecognizedEntities:
  160. - return u'&amp;%s;' % x
  161. + return '&amp;%s;' % x
  162. else:
  163. - return u'&%s;' % x
  164. + return '&%s;' % x
  165. def __init__(self, parser, name, attrs=None, parent=None,
  166. previous=None):
  167. @@ -524,7 +524,7 @@
  168. return kval
  169. return (k, re.sub("&(#\d+|#x[0-9a-fA-F]+|\w+);",
  170. self._convertEntities, val))
  171. - self.attrs = map(convert, self.attrs)
  172. + self.attrs = list(map(convert, self.attrs))
  173. def get(self, key, default=None):
  174. """Returns the value of the 'key' attribute for the tag, or
  175. @@ -533,7 +533,7 @@
  176. return self._getAttrMap().get(key, default)
  177. def has_key(self, key):
  178. - return self._getAttrMap().has_key(key)
  179. + return key in self._getAttrMap()
  180. def __getitem__(self, key):
  181. """tag[key] returns the value of the 'key' attribute for the tag,
  182. @@ -551,7 +551,7 @@
  183. def __contains__(self, x):
  184. return x in self.contents
  185. - def __nonzero__(self):
  186. + def __bool__(self):
  187. "A tag is non-None even if it has no contents."
  188. return True
  189. @@ -577,14 +577,14 @@
  190. #We don't break because bad HTML can define the same
  191. #attribute multiple times.
  192. self._getAttrMap()
  193. - if self.attrMap.has_key(key):
  194. + if key in self.attrMap:
  195. del self.attrMap[key]
  196. def __call__(self, *args, **kwargs):
  197. """Calling a tag like a function is the same as calling its
  198. findAll() method. Eg. tag('a') returns a list of all the A tags
  199. found within this tag."""
  200. - return apply(self.findAll, args, kwargs)
  201. + return self.findAll(*args, **kwargs)
  202. def __getattr__(self, tag):
  203. #print "Getattr %s.%s" % (self.__class__, tag)
  204. @@ -592,7 +592,7 @@
  205. return self.find(tag[:-3])
  206. elif tag.find('__') != 0:
  207. return self.find(tag)
  208. - raise AttributeError, "'%s' object has no attribute '%s'" % (self.__class__, tag)
  209. + raise AttributeError("'%s' object has no attribute '%s'" % (self.__class__, tag))
  210. def __eq__(self, other):
  211. """Returns true iff this tag has the same name, the same attributes,
  212. @@ -868,7 +868,7 @@
  213. if isinstance(markupName, Tag):
  214. markup = markupName
  215. markupAttrs = markup
  216. - callFunctionWithTagData = callable(self.name) \
  217. + callFunctionWithTagData = hasattr(self.name, '__call__') \
  218. and not isinstance(markupName, Tag)
  219. if (not self.name) \
  220. @@ -880,7 +880,7 @@
  221. else:
  222. match = True
  223. markupAttrMap = None
  224. - for attr, matchAgainst in self.attrs.items():
  225. + for attr, matchAgainst in list(self.attrs.items()):
  226. if not markupAttrMap:
  227. if hasattr(markupAttrs, 'get'):
  228. markupAttrMap = markupAttrs
  229. @@ -921,16 +921,16 @@
  230. if self._matches(markup, self.text):
  231. found = markup
  232. else:
  233. - raise Exception, "I don't know how to match against a %s" \
  234. - % markup.__class__
  235. + raise Exception("I don't know how to match against a %s" \
  236. + % markup.__class__)
  237. return found
  238. def _matches(self, markup, matchAgainst):
  239. #print "Matching %s against %s" % (markup, matchAgainst)
  240. result = False
  241. - if matchAgainst == True and type(matchAgainst) == types.BooleanType:
  242. + if matchAgainst == True and type(matchAgainst) == bool:
  243. result = markup != None
  244. - elif callable(matchAgainst):
  245. + elif hasattr(matchAgainst, '__call__'):
  246. result = matchAgainst(markup)
  247. else:
  248. #Custom match methods take the tag as an argument, but all
  249. @@ -938,7 +938,7 @@
  250. if isinstance(markup, Tag):
  251. markup = markup.name
  252. if markup is not None and not isString(markup):
  253. - markup = unicode(markup)
  254. + markup = str(markup)
  255. #Now we know that chunk is either a string, or None.
  256. if hasattr(matchAgainst, 'match'):
  257. # It's a regexp object.
  258. @@ -947,10 +947,10 @@
  259. and (markup is not None or not isString(matchAgainst))):
  260. result = markup in matchAgainst
  261. elif hasattr(matchAgainst, 'items'):
  262. - result = markup.has_key(matchAgainst)
  263. + result = matchAgainst in markup
  264. elif matchAgainst and isString(markup):
  265. - if isinstance(markup, unicode):
  266. - matchAgainst = unicode(matchAgainst)
  267. + if isinstance(markup, str):
  268. + matchAgainst = str(matchAgainst)
  269. else:
  270. matchAgainst = str(matchAgainst)
  271. @@ -971,13 +971,13 @@
  272. """Convenience method that works with all 2.x versions of Python
  273. to determine whether or not something is listlike."""
  274. return ((hasattr(l, '__iter__') and not isString(l))
  275. - or (type(l) in (types.ListType, types.TupleType)))
  276. + or (type(l) in (list, tuple)))
  277. def isString(s):
  278. """Convenience method that works with all 2.x versions of Python
  279. to determine whether or not something is stringlike."""
  280. try:
  281. - return isinstance(s, unicode) or isinstance(s, basestring)
  282. + return isinstance(s, str) or isinstance(s, str)
  283. except NameError:
  284. return isinstance(s, str)
  285. @@ -989,7 +989,7 @@
  286. for portion in args:
  287. if hasattr(portion, 'items'):
  288. #It's a map. Merge it.
  289. - for k,v in portion.items():
  290. + for k,v in list(portion.items()):
  291. built[k] = v
  292. elif isList(portion) and not isString(portion):
  293. #It's a list. Map each item to the default.
  294. @@ -1034,7 +1034,7 @@
  295. object, possibly one with a %SOUP-ENCODING% slot into which an
  296. encoding will be plugged later."""
  297. if text[:3] == "xml":
  298. - text = u"xml version='1.0' encoding='%SOUP-ENCODING%'"
  299. + text = "xml version='1.0' encoding='%SOUP-ENCODING%'"
  300. self._toStringSubclass(text, ProcessingInstruction)
  301. def handle_comment(self, text):
  302. @@ -1044,7 +1044,7 @@
  303. def handle_charref(self, ref):
  304. "Handle character references as data."
  305. if self.soup.convertEntities:
  306. - data = unichr(int(ref))
  307. + data = chr(int(ref))
  308. else:
  309. data = '&#%s;' % ref
  310. self.handle_data(data)
  311. @@ -1056,7 +1056,7 @@
  312. data = None
  313. if self.soup.convertHTMLEntities:
  314. try:
  315. - data = unichr(name2codepoint[ref])
  316. + data = chr(name2codepoint[ref])
  317. except KeyError:
  318. pass
  319. @@ -1147,7 +1147,7 @@
  320. lambda x: '<!' + x.group(1) + '>')
  321. ]
  322. - ROOT_TAG_NAME = u'[document]'
  323. + ROOT_TAG_NAME = '[document]'
  324. HTML_ENTITIES = "html"
  325. XML_ENTITIES = "xml"
  326. @@ -1236,14 +1236,14 @@
  327. def _feed(self, inDocumentEncoding=None, isHTML=False):
  328. # Convert the document to Unicode.
  329. markup = self.markup
  330. - if isinstance(markup, unicode):
  331. + if isinstance(markup, str):
  332. if not hasattr(self, 'originalEncoding'):
  333. self.originalEncoding = None
  334. else:
  335. dammit = UnicodeDammit\
  336. (markup, [self.fromEncoding, inDocumentEncoding],
  337. smartQuotesTo=self.smartQuotesTo, isHTML=isHTML)
  338. - markup = dammit.unicode
  339. + markup = dammit.str
  340. self.originalEncoding = dammit.originalEncoding
  341. self.declaredHTMLEncoding = dammit.declaredHTMLEncoding
  342. if markup:
  343. @@ -1269,8 +1269,8 @@
  344. def isSelfClosingTag(self, name):
  345. """Returns true iff the given string is the name of a
  346. self-closing tag according to this parser."""
  347. - return self.SELF_CLOSING_TAGS.has_key(name) \
  348. - or self.instanceSelfClosingTags.has_key(name)
  349. + return name in self.SELF_CLOSING_TAGS \
  350. + or name in self.instanceSelfClosingTags
  351. def reset(self):
  352. Tag.__init__(self, self, self.ROOT_TAG_NAME)
  353. @@ -1305,7 +1305,7 @@
  354. def endData(self, containerClass=NavigableString):
  355. if self.currentData:
  356. - currentData = u''.join(self.currentData)
  357. + currentData = ''.join(self.currentData)
  358. if (currentData.translate(self.STRIP_ASCII_SPACES) == '' and
  359. not set([tag.name for tag in self.tagStack]).intersection(
  360. self.PRESERVE_WHITESPACE_TAGS)):
  361. @@ -1368,7 +1368,7 @@
  362. nestingResetTriggers = self.NESTABLE_TAGS.get(name)
  363. isNestable = nestingResetTriggers != None
  364. - isResetNesting = self.RESET_NESTING_TAGS.has_key(name)
  365. + isResetNesting = name in self.RESET_NESTING_TAGS
  366. popTo = None
  367. inclusive = True
  368. for i in range(len(self.tagStack)-1, 0, -1):
  369. @@ -1381,7 +1381,7 @@
  370. if (nestingResetTriggers != None
  371. and p.name in nestingResetTriggers) \
  372. or (nestingResetTriggers == None and isResetNesting
  373. - and self.RESET_NESTING_TAGS.has_key(p.name)):
  374. + and p.name in self.RESET_NESTING_TAGS):
  375. #If we encounter one of the nesting reset triggers
  376. #peculiar to this tag, or we encounter another tag
  377. @@ -1399,7 +1399,7 @@
  378. if self.quoteStack:
  379. #This is not a real tag.
  380. #print "<%s> is not real!" % name
  381. - attrs = ''.join(map(lambda(x, y): ' %s="%s"' % (x, y), attrs))
  382. + attrs = ''.join([' %s="%s"' % (x_y[0], x_y[1]) for x_y in attrs])
  383. self.handle_data('<%s%s>' % (name, attrs))
  384. return
  385. self.endData()
  386. @@ -1493,7 +1493,7 @@
  387. BeautifulStoneSoup before writing your own subclass."""
  388. def __init__(self, *args, **kwargs):
  389. - if not kwargs.has_key('smartQuotesTo'):
  390. + if 'smartQuotesTo' not in kwargs:
  391. kwargs['smartQuotesTo'] = self.HTML_ENTITIES
  392. kwargs['isHTML'] = True
  393. BeautifulStoneSoup.__init__(self, *args, **kwargs)
  394. @@ -1677,7 +1677,7 @@
  395. parent._getAttrMap()
  396. if (isinstance(tag, Tag) and len(tag.contents) == 1 and
  397. isinstance(tag.contents[0], NavigableString) and
  398. - not parent.attrMap.has_key(tag.name)):
  399. + tag.name not in parent.attrMap):
  400. parent[tag.name] = tag.contents[0]
  401. BeautifulStoneSoup.popTag(self)
  402. @@ -1751,9 +1751,9 @@
  403. self._detectEncoding(markup, isHTML)
  404. self.smartQuotesTo = smartQuotesTo
  405. self.triedEncodings = []
  406. - if markup == '' or isinstance(markup, unicode):
  407. + if markup == '' or isinstance(markup, str):
  408. self.originalEncoding = None
  409. - self.unicode = unicode(markup)
  410. + self.str = str(markup)
  411. return
  412. u = None
  413. @@ -1766,7 +1766,7 @@
  414. if u: break
  415. # If no luck and we have auto-detection library, try that:
  416. - if not u and chardet and not isinstance(self.markup, unicode):
  417. + if not u and chardet and not isinstance(self.markup, str):
  418. u = self._convertFrom(chardet.detect(self.markup)['encoding'])
  419. # As a last resort, try utf-8 and windows-1252:
  420. @@ -1775,7 +1775,7 @@
  421. u = self._convertFrom(proposed_encoding)
  422. if u: break
  423. - self.unicode = u
  424. + self.str = u
  425. if not u: self.originalEncoding = None
  426. def _subMSChar(self, match):
  427. @@ -1783,7 +1783,7 @@
  428. entity."""
  429. orig = match.group(1)
  430. sub = self.MS_CHARS.get(orig)
  431. - if type(sub) == types.TupleType:
  432. + if type(sub) == tuple:
  433. if self.smartQuotesTo == 'xml':
  434. sub = '&#x'.encode() + sub[1].encode() + ';'.encode()
  435. else:
  436. @@ -1804,7 +1804,7 @@
  437. if self.smartQuotesTo and proposed.lower() in("windows-1252",
  438. "iso-8859-1",
  439. "iso-8859-2"):
  440. - smart_quotes_re = "([\x80-\x9f])"
  441. + smart_quotes_re = b"([\x80-\x9f])"
  442. smart_quotes_compiled = re.compile(smart_quotes_re)
  443. markup = smart_quotes_compiled.sub(self._subMSChar, markup)
  444. @@ -1813,7 +1813,7 @@
  445. u = self._toUnicode(markup, proposed)
  446. self.markup = u
  447. self.originalEncoding = proposed
  448. - except Exception, e:
  449. + except Exception as e:
  450. # print "That didn't work!"
  451. # print e
  452. return None
  453. @@ -1842,7 +1842,7 @@
  454. elif data[:4] == '\xff\xfe\x00\x00':
  455. encoding = 'utf-32le'
  456. data = data[4:]
  457. - newdata = unicode(data, encoding)
  458. + newdata = str(data, encoding)
  459. return newdata
  460. def _detectEncoding(self, xml_data, isHTML=False):
  461. @@ -1855,41 +1855,41 @@
  462. elif xml_data[:4] == '\x00\x3c\x00\x3f':
  463. # UTF-16BE
  464. sniffed_xml_encoding = 'utf-16be'
  465. - xml_data = unicode(xml_data, 'utf-16be').encode('utf-8')
  466. + xml_data = str(xml_data, 'utf-16be').encode('utf-8')
  467. elif (len(xml_data) >= 4) and (xml_data[:2] == '\xfe\xff') \
  468. and (xml_data[2:4] != '\x00\x00'):
  469. # UTF-16BE with BOM
  470. sniffed_xml_encoding = 'utf-16be'
  471. - xml_data = unicode(xml_data[2:], 'utf-16be').encode('utf-8')
  472. + xml_data = str(xml_data[2:], 'utf-16be').encode('utf-8')
  473. elif xml_data[:4] == '\x3c\x00\x3f\x00':
  474. # UTF-16LE
  475. sniffed_xml_encoding = 'utf-16le'
  476. - xml_data = unicode(xml_data, 'utf-16le').encode('utf-8')
  477. + xml_data = str(xml_data, 'utf-16le').encode('utf-8')
  478. elif (len(xml_data) >= 4) and (xml_data[:2] == '\xff\xfe') and \
  479. (xml_data[2:4] != '\x00\x00'):
  480. # UTF-16LE with BOM
  481. sniffed_xml_encoding = 'utf-16le'
  482. - xml_data = unicode(xml_data[2:], 'utf-16le').encode('utf-8')
  483. + xml_data = str(xml_data[2:], 'utf-16le').encode('utf-8')
  484. elif xml_data[:4] == '\x00\x00\x00\x3c':
  485. # UTF-32BE
  486. sniffed_xml_encoding = 'utf-32be'
  487. - xml_data = unicode(xml_data, 'utf-32be').encode('utf-8')
  488. + xml_data = str(xml_data, 'utf-32be').encode('utf-8')
  489. elif xml_data[:4] == '\x3c\x00\x00\x00':
  490. # UTF-32LE
  491. sniffed_xml_encoding = 'utf-32le'
  492. - xml_data = unicode(xml_data, 'utf-32le').encode('utf-8')
  493. + xml_data = str(xml_data, 'utf-32le').encode('utf-8')
  494. elif xml_data[:4] == '\x00\x00\xfe\xff':
  495. # UTF-32BE with BOM
  496. sniffed_xml_encoding = 'utf-32be'
  497. - xml_data = unicode(xml_data[4:], 'utf-32be').encode('utf-8')
  498. + xml_data = str(xml_data[4:], 'utf-32be').encode('utf-8')
  499. elif xml_data[:4] == '\xff\xfe\x00\x00':
  500. # UTF-32LE with BOM
  501. sniffed_xml_encoding = 'utf-32le'
  502. - xml_data = unicode(xml_data[4:], 'utf-32le').encode('utf-8')
  503. + xml_data = str(xml_data[4:], 'utf-32le').encode('utf-8')
  504. elif xml_data[:3] == '\xef\xbb\xbf':
  505. # UTF-8 with BOM
  506. sniffed_xml_encoding = 'utf-8'
  507. - xml_data = unicode(xml_data[3:], 'utf-8').encode('utf-8')
  508. + xml_data = str(xml_data[3:], 'utf-8').encode('utf-8')
  509. else:
  510. sniffed_xml_encoding = 'ascii'
  511. pass
  512. @@ -1954,41 +1954,41 @@
  513. 250,251,252,253,254,255)
  514. import string
  515. c.EBCDIC_TO_ASCII_MAP = string.maketrans( \
  516. - ''.join(map(chr, range(256))), ''.join(map(chr, emap)))
  517. + ''.join(map(chr, list(range(256)))), ''.join(map(chr, emap)))
  518. return s.translate(c.EBCDIC_TO_ASCII_MAP)
  519. - MS_CHARS = { '\x80' : ('euro', '20AC'),
  520. - '\x81' : ' ',
  521. - '\x82' : ('sbquo', '201A'),
  522. - '\x83' : ('fnof', '192'),
  523. - '\x84' : ('bdquo', '201E'),
  524. - '\x85' : ('hellip', '2026'),
  525. - '\x86' : ('dagger', '2020'),
  526. - '\x87' : ('Dagger', '2021'),
  527. - '\x88' : ('circ', '2C6'),
  528. - '\x89' : ('permil', '2030'),
  529. - '\x8A' : ('Scaron', '160'),
  530. - '\x8B' : ('lsaquo', '2039'),
  531. - '\x8C' : ('OElig', '152'),
  532. - '\x8D' : '?',
  533. - '\x8E' : ('#x17D', '17D'),
  534. - '\x8F' : '?',
  535. - '\x90' : '?',
  536. - '\x91' : ('lsquo', '2018'),
  537. - '\x92' : ('rsquo', '2019'),
  538. - '\x93' : ('ldquo', '201C'),
  539. - '\x94' : ('rdquo', '201D'),
  540. - '\x95' : ('bull', '2022'),
  541. - '\x96' : ('ndash', '2013'),
  542. - '\x97' : ('mdash', '2014'),
  543. - '\x98' : ('tilde', '2DC'),
  544. - '\x99' : ('trade', '2122'),
  545. - '\x9a' : ('scaron', '161'),
  546. - '\x9b' : ('rsaquo', '203A'),
  547. - '\x9c' : ('oelig', '153'),
  548. - '\x9d' : '?',
  549. - '\x9e' : ('#x17E', '17E'),
  550. - '\x9f' : ('Yuml', ''),}
  551. + MS_CHARS = { b'\x80' : ('euro', '20AC'),
  552. + b'\x81' : ' ',
  553. + b'\x82' : ('sbquo', '201A'),
  554. + b'\x83' : ('fnof', '192'),
  555. + b'\x84' : ('bdquo', '201E'),
  556. + b'\x85' : ('hellip', '2026'),
  557. + b'\x86' : ('dagger', '2020'),
  558. + b'\x87' : ('Dagger', '2021'),
  559. + b'\x88' : ('circ', '2C6'),
  560. + b'\x89' : ('permil', '2030'),
  561. + b'\x8A' : ('Scaron', '160'),
  562. + b'\x8B' : ('lsaquo', '2039'),
  563. + b'\x8C' : ('OElig', '152'),
  564. + b'\x8D' : '?',
  565. + b'\x8E' : ('#x17D', '17D'),
  566. + b'\x8F' : '?',
  567. + b'\x90' : '?',
  568. + b'\x91' : ('lsquo', '2018'),
  569. + b'\x92' : ('rsquo', '2019'),
  570. + b'\x93' : ('ldquo', '201C'),
  571. + b'\x94' : ('rdquo', '201D'),
  572. + b'\x95' : ('bull', '2022'),
  573. + b'\x96' : ('ndash', '2013'),
  574. + b'\x97' : ('mdash', '2014'),
  575. + b'\x98' : ('tilde', '2DC'),
  576. + b'\x99' : ('trade', '2122'),
  577. + b'\x9a' : ('scaron', '161'),
  578. + b'\x9b' : ('rsaquo', '203A'),
  579. + b'\x9c' : ('oelig', '153'),
  580. + b'\x9d' : '?',
  581. + b'\x9e' : ('#x17E', '17E'),
  582. + b'\x9f' : ('Yuml', ''),}
  583. #######################################################################
  584. @@ -1997,4 +1997,4 @@
  585. if __name__ == '__main__':
  586. import sys
  587. soup = BeautifulSoup(sys.stdin)
  588. - print soup.prettify()
  589. + print(soup.prettify())
  590. --- BeautifulSoupTests.py
  591. +++ BeautifulSoupTests.py
  592. @@ -82,7 +82,7 @@
  593. def testFindAllText(self):
  594. soup = BeautifulSoup("<html>\xbb</html>")
  595. self.assertEqual(soup.findAll(text=re.compile('.*')),
  596. - [u'\xbb'])
  597. + ['\xbb'])
  598. def testFindAllByRE(self):
  599. import re
  600. @@ -215,7 +215,7 @@
  601. soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
  602. self.assertEquals(len(soup), 10)
  603. - strainer = SoupStrainer(text=lambda(x):x[8]=='3')
  604. + strainer = SoupStrainer(text=lambda x:x[8]=='3')
  605. soup = BeautifulSoup(self.x, parseOnlyThese=strainer)
  606. self.assertEquals(len(soup), 3)
  607. @@ -256,7 +256,7 @@
  608. self.assertEqual(copied.decode(), self.soup.decode())
  609. def testUnicodePickle(self):
  610. - import cPickle as pickle
  611. + import pickle as pickle
  612. html = "<b>" + chr(0xc3) + "</b>"
  613. soup = BeautifulSoup(html)
  614. dumped = pickle.dumps(soup, pickle.HIGHEST_PROTOCOL)
  615. @@ -586,23 +586,23 @@
  616. self.assertEquals(soup.decode(), "<<sacr&eacute; bleu!>>")
  617. soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
  618. - self.assertEquals(soup.decode(), u"<<sacr\xe9 bleu!>>")
  619. + self.assertEquals(soup.decode(), "<<sacr\xe9 bleu!>>")
  620. # Make sure the "XML", "HTML", and "XHTML" settings work.
  621. text = "&lt;&trade;&apos;"
  622. soup = BeautifulStoneSoup(text, convertEntities=xmlEnt)
  623. - self.assertEquals(soup.decode(), u"<&trade;'")
  624. + self.assertEquals(soup.decode(), "<&trade;'")
  625. soup = BeautifulStoneSoup(text, convertEntities=htmlEnt)
  626. - self.assertEquals(soup.decode(), u"<\u2122&apos;")
  627. + self.assertEquals(soup.decode(), "<\u2122&apos;")
  628. soup = BeautifulStoneSoup(text, convertEntities=xhtmlEnt)
  629. - self.assertEquals(soup.decode(), u"<\u2122'")
  630. + self.assertEquals(soup.decode(), "<\u2122'")
  631. def testNonBreakingSpaces(self):
  632. soup = BeautifulSoup("<a>&nbsp;&nbsp;</a>",
  633. convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
  634. - self.assertEquals(soup.decode(), u"<a>\xa0\xa0</a>")
  635. + self.assertEquals(soup.decode(), "<a>\xa0\xa0</a>")
  636. def testWhitespaceInDeclaration(self):
  637. self.assertSoupEquals('<! DOCTYPE>', '<!DOCTYPE>')
  638. @@ -617,27 +617,27 @@
  639. self.assertSoupEquals('<b>hello&nbsp;there</b>')
  640. def testEntitiesInAttributeValues(self):
  641. - self.assertSoupEquals('<x t="x&#241;">', '<x t="x\xc3\xb1"></x>',
  642. + self.assertSoupEquals('<x t="x&#241;">', b'<x t="x\xc3\xb1"></x>',
  643. encoding='utf-8')
  644. - self.assertSoupEquals('<x t="x&#xf1;">', '<x t="x\xc3\xb1"></x>',
  645. + self.assertSoupEquals('<x t="x&#xf1;">', b'<x t="x\xc3\xb1"></x>',
  646. encoding='utf-8')
  647. soup = BeautifulSoup('<x t="&gt;&trade;">',
  648. convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
  649. - self.assertEquals(soup.decode(), u'<x t="&gt;\u2122"></x>')
  650. + self.assertEquals(soup.decode(), '<x t="&gt;\u2122"></x>')
  651. uri = "http://crummy.com?sacr&eacute;&amp;bleu"
  652. link = '<a href="%s"></a>' % uri
  653. soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
  654. self.assertEquals(soup.decode(),
  655. - link.replace("&eacute;", u"\xe9"))
  656. + link.replace("&eacute;", "\xe9"))
  657. uri = "http://crummy.com?sacr&eacute;&bleu"
  658. link = '<a href="%s"></a>' % uri
  659. soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
  660. self.assertEquals(soup.a['href'],
  661. - uri.replace("&eacute;", u"\xe9"))
  662. + uri.replace("&eacute;", "\xe9"))
  663. def testNakedAmpersands(self):
  664. html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}
  665. @@ -663,13 +663,13 @@
  666. smart quote fixes."""
  667. def testUnicodeDammitStandalone(self):
  668. - markup = "<foo>\x92</foo>"
  669. + markup = b"<foo>\x92</foo>"
  670. dammit = UnicodeDammit(markup)
  671. - self.assertEquals(dammit.unicode, "<foo>&#x2019;</foo>")
  672. + self.assertEquals(dammit.str, "<foo>&#x2019;</foo>")
  673. - hebrew = "\xed\xe5\xec\xf9"
  674. + hebrew = b"\xed\xe5\xec\xf9"
  675. dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
  676. - self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
  677. + self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9')
  678. self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
  679. def testGarbageInGarbageOut(self):
  680. @@ -677,13 +677,13 @@
  681. asciiSoup = BeautifulStoneSoup(ascii)
  682. self.assertEquals(ascii, asciiSoup.decode())
  683. - unicodeData = u"<foo>\u00FC</foo>"
  684. + unicodeData = "<foo>\u00FC</foo>"
  685. utf8 = unicodeData.encode("utf-8")
  686. - self.assertEquals(utf8, '<foo>\xc3\xbc</foo>')
  687. + self.assertEquals(utf8, b'<foo>\xc3\xbc</foo>')
  688. unicodeSoup = BeautifulStoneSoup(unicodeData)
  689. self.assertEquals(unicodeData, unicodeSoup.decode())
  690. - self.assertEquals(unicodeSoup.foo.string, u'\u00FC')
  691. + self.assertEquals(unicodeSoup.foo.string, '\u00FC')
  692. utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')
  693. self.assertEquals(utf8, utf8Soup.encode('utf-8'))
  694. @@ -696,18 +696,18 @@
  695. def testHandleInvalidCodec(self):
  696. for bad_encoding in ['.utf8', '...', 'utF---16.!']:
  697. - soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
  698. + soup = BeautifulSoup("Räksmörgås".encode("utf-8"),
  699. fromEncoding=bad_encoding)
  700. self.assertEquals(soup.originalEncoding, 'utf-8')
  701. def testUnicodeSearch(self):
  702. - html = u'<html><body><h1>Räksmörgås</h1></body></html>'
  703. + html = '<html><body><h1>Räksmörgås</h1></body></html>'
  704. soup = BeautifulSoup(html)
  705. - self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
  706. + self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås')
  707. def testRewrittenXMLHeader(self):
  708. - euc_jp = '<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
  709. - utf8 = "<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
  710. + euc_jp = b'<?xml version="1.0 encoding="euc-jp"?>\n<foo>\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n</foo>\n'
  711. + utf8 = b"<?xml version='1.0' encoding='utf-8'?>\n<foo>\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n</foo>\n"
  712. soup = BeautifulStoneSoup(euc_jp)
  713. if soup.originalEncoding != "euc-jp":
  714. raise Exception("Test failed when parsing euc-jp document. "
  715. @@ -718,12 +718,12 @@
  716. self.assertEquals(soup.originalEncoding, "euc-jp")
  717. self.assertEquals(soup.renderContents('utf-8'), utf8)
  718. - old_text = "<?xml encoding='windows-1252'><foo>\x92</foo>"
  719. + old_text = b"<?xml encoding='windows-1252'><foo>\x92</foo>"
  720. new_text = "<?xml version='1.0' encoding='utf-8'?><foo>&rsquo;</foo>"
  721. self.assertSoupEquals(old_text, new_text)
  722. def testRewrittenMetaTag(self):
  723. - no_shift_jis_html = '''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
  724. + no_shift_jis_html = b'''<html><head>\n<meta http-equiv="Content-language" content="ja" /></head><body><pre>\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n</pre></body></html>'''
  725. soup = BeautifulSoup(no_shift_jis_html)
  726. # Beautiful Soup used to try to rewrite the meta tag even if the
  727. @@ -733,16 +733,16 @@
  728. soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
  729. self.assertEquals(soup.contents[0].name, 'pre')
  730. - meta_tag = ('<meta content="text/html; charset=x-sjis" '
  731. - 'http-equiv="Content-type" />')
  732. + meta_tag = (b'<meta content="text/html; charset=x-sjis" '
  733. + b'http-equiv="Content-type" />')
  734. shift_jis_html = (
  735. - '<html><head>\n%s\n'
  736. - '<meta http-equiv="Content-language" content="ja" />'
  737. - '</head><body><pre>\n'
  738. - '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
  739. - '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
  740. - '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
  741. - '</pre></body></html>') % meta_tag
  742. + b'<html><head>\n' + meta_tag + b'\n'
  743. + b'<meta http-equiv="Content-language" content="ja" />'
  744. + b'</head><body><pre>\n'
  745. + b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
  746. + b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
  747. + b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
  748. + b'</pre></body></html>')
  749. soup = BeautifulSoup(shift_jis_html)
  750. if soup.originalEncoding != "shift-jis":
  751. raise Exception("Test failed when parsing shift-jis document "
  752. @@ -755,59 +755,59 @@
  753. content_type_tag = soup.meta['content']
  754. self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
  755. 'charset=%SOUP-ENCODING%')
  756. - content_type = str(soup.meta)
  757. + content_type = soup.meta.decode()
  758. index = content_type.find('charset=')
  759. self.assertEqual(content_type[index:index+len('charset=utf8')+1],
  760. 'charset=utf-8')
  761. content_type = soup.meta.encode('shift-jis')
  762. - index = content_type.find('charset=')
  763. + index = content_type.find(b'charset=')
  764. self.assertEqual(content_type[index:index+len('charset=shift-jis')],
  765. 'charset=shift-jis'.encode())
  766. self.assertEquals(soup.encode('utf-8'), (
  767. - '<html><head>\n'
  768. - '<meta content="text/html; charset=utf-8" '
  769. - 'http-equiv="Content-type" />\n'
  770. - '<meta http-equiv="Content-language" content="ja" />'
  771. - '</head><body><pre>\n'
  772. - '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
  773. - '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
  774. - '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
  775. - '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
  776. - '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
  777. - '</pre></body></html>'))
  778. + b'<html><head>\n'
  779. + b'<meta content="text/html; charset=utf-8" '
  780. + b'http-equiv="Content-type" />\n'
  781. + b'<meta http-equiv="Content-language" content="ja" />'
  782. + b'</head><body><pre>\n'
  783. + b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
  784. + b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
  785. + b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
  786. + b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
  787. + b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
  788. + b'</pre></body></html>'))
  789. self.assertEquals(soup.encode("shift-jis"),
  790. shift_jis_html.replace('x-sjis'.encode(),
  791. 'shift-jis'.encode()))
  792. - isolatin = """<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
  793. + isolatin = b"""<html><meta http-equiv="Content-type" content="text/html; charset=ISO-Latin-1" />Sacr\xe9 bleu!</html>"""
  794. soup = BeautifulSoup(isolatin)
  795. utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
  796. - utf8 = utf8.replace("\xe9", "\xc3\xa9")
  797. + utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
  798. self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
  799. def testHebrew(self):
  800. - iso_8859_8= '<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
  801. - utf8 = '<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
  802. + iso_8859_8= b'<HEAD>\n<TITLE>Hebrew (ISO 8859-8) in Visual Directionality</TITLE>\n\n\n\n</HEAD>\n<BODY>\n<H1>Hebrew (ISO 8859-8) in Visual Directionality</H1>\n\xed\xe5\xec\xf9\n</BODY>\n'
  803. + utf8 = b'<head>\n<title>Hebrew (ISO 8859-8) in Visual Directionality</title>\n</head>\n<body>\n<h1>Hebrew (ISO 8859-8) in Visual Directionality</h1>\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n</body>\n'
  804. soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
  805. self.assertEquals(soup.encode('utf-8'), utf8)
  806. def testSmartQuotesNotSoSmartAnymore(self):
  807. - self.assertSoupEquals("\x91Foo\x92 <!--blah-->",
  808. + self.assertSoupEquals(b"\x91Foo\x92 <!--blah-->",
  809. '&lsquo;Foo&rsquo; <!--blah-->')
  810. def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
  811. - smartQuotes = "Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
  812. + smartQuotes = b"Il a dit, \x8BSacr&eacute; bl&#101;u!\x9b"
  813. soup = BeautifulSoup(smartQuotes)
  814. self.assertEquals(soup.decode(),
  815. 'Il a dit, &lsaquo;Sacr&eacute; bl&#101;u!&rsaquo;')
  816. soup = BeautifulSoup(smartQuotes, convertEntities="html")
  817. self.assertEquals(soup.encode('utf-8'),
  818. - 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
  819. + b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
  820. def testDontSeeSmartQuotesWhereThereAreNone(self):
  821. - utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
  822. + utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
  823. self.assertSoupEquals(utf_8, encoding='utf-8')
  824. --- setup.py
  825. +++ setup.py
  826. @@ -19,19 +19,19 @@
  827. suite = loader.loadTestsFromModule(BeautifulSoupTests)
  828. suite.run(result)
  829. if not result.wasSuccessful():
  830. - print "Unit tests have failed!"
  831. + print("Unit tests have failed!")
  832. for l in result.errors, result.failures:
  833. for case, error in l:
  834. - print "-" * 80
  835. + print("-" * 80)
  836. desc = case.shortDescription()
  837. if desc:
  838. - print desc
  839. - print error
  840. - print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''
  841. - print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."
  842. + print(desc)
  843. + print(error)
  844. + print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''')
  845. + print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.")
  846. if sys.argv[1] == 'sdist':
  847. - print
  848. - print "I'm not going to make a source distribution since the tests don't pass."
  849. + print()
  850. + print("I'm not going to make a source distribution since the tests don't pass.")
  851. sys.exit(1)
  852. setup(name="BeautifulSoup",