',
convertEntities=BeautifulStoneSoup.HTML_ENTITIES)
- self.assertEquals(soup.decode(), u'')
+ self.assertEquals(soup.decode(), '')
uri = "http://crummy.com?sacré&bleu"
link = '' % uri
soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
self.assertEquals(soup.decode(),
- link.replace("é", u"\xe9"))
+ link.replace("é", "\xe9"))
uri = "http://crummy.com?sacré&bleu"
link = '' % uri
soup = BeautifulSoup(link, convertEntities=BeautifulSoup.HTML_ENTITIES)
self.assertEquals(soup.a['href'],
- uri.replace("é", u"\xe9"))
+ uri.replace("é", "\xe9"))
def testNakedAmpersands(self):
html = {'convertEntities':BeautifulStoneSoup.HTML_ENTITIES}
@@ -663,13 +663,13 @@
smart quote fixes."""
def testUnicodeDammitStandalone(self):
- markup = "\x92"
+ markup = b"\x92"
dammit = UnicodeDammit(markup)
- self.assertEquals(dammit.unicode, "’")
+ self.assertEquals(dammit.str, "’")
- hebrew = "\xed\xe5\xec\xf9"
+ hebrew = b"\xed\xe5\xec\xf9"
dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
- self.assertEquals(dammit.unicode, u'\u05dd\u05d5\u05dc\u05e9')
+ self.assertEquals(dammit.str, '\u05dd\u05d5\u05dc\u05e9')
self.assertEquals(dammit.originalEncoding, 'iso-8859-8')
def testGarbageInGarbageOut(self):
@@ -677,13 +677,13 @@
asciiSoup = BeautifulStoneSoup(ascii)
self.assertEquals(ascii, asciiSoup.decode())
- unicodeData = u"\u00FC"
+ unicodeData = "\u00FC"
utf8 = unicodeData.encode("utf-8")
- self.assertEquals(utf8, '\xc3\xbc')
+ self.assertEquals(utf8, b'\xc3\xbc')
unicodeSoup = BeautifulStoneSoup(unicodeData)
self.assertEquals(unicodeData, unicodeSoup.decode())
- self.assertEquals(unicodeSoup.foo.string, u'\u00FC')
+ self.assertEquals(unicodeSoup.foo.string, '\u00FC')
utf8Soup = BeautifulStoneSoup(utf8, fromEncoding='utf-8')
self.assertEquals(utf8, utf8Soup.encode('utf-8'))
@@ -696,18 +696,18 @@
def testHandleInvalidCodec(self):
for bad_encoding in ['.utf8', '...', 'utF---16.!']:
- soup = BeautifulSoup(u"Räksmörgås".encode("utf-8"),
+ soup = BeautifulSoup("Räksmörgås".encode("utf-8"),
fromEncoding=bad_encoding)
self.assertEquals(soup.originalEncoding, 'utf-8')
def testUnicodeSearch(self):
- html = u'Räksmörgås
'
+ html = 'Räksmörgås
'
soup = BeautifulSoup(html)
- self.assertEqual(soup.find(text=u'Räksmörgås'),u'Räksmörgås')
+ self.assertEqual(soup.find(text='Räksmörgås'),'Räksmörgås')
def testRewrittenXMLHeader(self):
- euc_jp = '\n\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n\n'
- utf8 = "\n\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n\n"
+ euc_jp = b'\n\n\xa4\xb3\xa4\xec\xa4\xcfEUC-JP\xa4\xc7\xa5\xb3\xa1\xbc\xa5\xc7\xa5\xa3\xa5\xf3\xa5\xb0\xa4\xb5\xa4\xec\xa4\xbf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xce\xa5\xd5\xa5\xa1\xa5\xa4\xa5\xeb\xa4\xc7\xa4\xb9\xa1\xa3\n\n'
+ utf8 = b"\n\n\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafEUC-JP\xe3\x81\xa7\xe3\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n\n"
soup = BeautifulStoneSoup(euc_jp)
if soup.originalEncoding != "euc-jp":
raise Exception("Test failed when parsing euc-jp document. "
@@ -718,12 +718,12 @@
self.assertEquals(soup.originalEncoding, "euc-jp")
self.assertEquals(soup.renderContents('utf-8'), utf8)
- old_text = "\x92"
+ old_text = b"\x92"
new_text = "’"
self.assertSoupEquals(old_text, new_text)
def testRewrittenMetaTag(self):
- no_shift_jis_html = '''\n\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'''
+ no_shift_jis_html = b'''\n\n\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'''
soup = BeautifulSoup(no_shift_jis_html)
# Beautiful Soup used to try to rewrite the meta tag even if the
@@ -733,16 +733,16 @@
soup = BeautifulSoup(no_shift_jis_html, parseOnlyThese=strainer)
self.assertEquals(soup.contents[0].name, 'pre')
- meta_tag = ('')
+ meta_tag = (b'')
shift_jis_html = (
- '\n%s\n'
- ''
- '\n'
- '\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
- '\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
- '\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
- '') % meta_tag
+ b'\n' + meta_tag + b'\n'
+ b''
+ b'\n'
+ b'\x82\xb1\x82\xea\x82\xcdShift-JIS\x82\xc5\x83R\x81[\x83f'
+ b'\x83B\x83\x93\x83O\x82\xb3\x82\xea\x82\xbd\x93\xfa\x96{\x8c'
+ b'\xea\x82\xcc\x83t\x83@\x83C\x83\x8b\x82\xc5\x82\xb7\x81B\n'
+ b'')
soup = BeautifulSoup(shift_jis_html)
if soup.originalEncoding != "shift-jis":
raise Exception("Test failed when parsing shift-jis document "
@@ -755,59 +755,59 @@
content_type_tag = soup.meta['content']
self.assertEquals(content_type_tag[content_type_tag.find('charset='):],
'charset=%SOUP-ENCODING%')
- content_type = str(soup.meta)
+ content_type = soup.meta.decode()
index = content_type.find('charset=')
self.assertEqual(content_type[index:index+len('charset=utf8')+1],
'charset=utf-8')
content_type = soup.meta.encode('shift-jis')
- index = content_type.find('charset=')
+ index = content_type.find(b'charset=')
self.assertEqual(content_type[index:index+len('charset=shift-jis')],
'charset=shift-jis'.encode())
self.assertEquals(soup.encode('utf-8'), (
- '\n'
- '\n'
- ''
- '\n'
- '\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
- '\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
- '\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
- '\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
- '\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
- '
'))
+ b'\n'
+ b'\n'
+ b''
+ b'\n'
+ b'\xe3\x81\x93\xe3\x82\x8c\xe3\x81\xafShift-JIS\xe3\x81\xa7\xe3'
+ b'\x82\xb3\xe3\x83\xbc\xe3\x83\x87\xe3\x82\xa3\xe3\x83\xb3\xe3'
+ b'\x82\xb0\xe3\x81\x95\xe3\x82\x8c\xe3\x81\x9f\xe6\x97\xa5\xe6'
+ b'\x9c\xac\xe8\xaa\x9e\xe3\x81\xae\xe3\x83\x95\xe3\x82\xa1\xe3'
+ b'\x82\xa4\xe3\x83\xab\xe3\x81\xa7\xe3\x81\x99\xe3\x80\x82\n'
+ b'
'))
self.assertEquals(soup.encode("shift-jis"),
shift_jis_html.replace('x-sjis'.encode(),
'shift-jis'.encode()))
- isolatin = """Sacr\xe9 bleu!"""
+ isolatin = b"""Sacr\xe9 bleu!"""
soup = BeautifulSoup(isolatin)
utf8 = isolatin.replace("ISO-Latin-1".encode(), "utf-8".encode())
- utf8 = utf8.replace("\xe9", "\xc3\xa9")
+ utf8 = utf8.replace(b"\xe9", b"\xc3\xa9")
self.assertSoupEquals(soup.encode("utf-8"), utf8, encoding='utf-8')
def testHebrew(self):
- iso_8859_8= '\nHebrew (ISO 8859-8) in Visual Directionality\n\n\n\n\n\nHebrew (ISO 8859-8) in Visual Directionality
\n\xed\xe5\xec\xf9\n\n'
- utf8 = '\nHebrew (ISO 8859-8) in Visual Directionality\n\n\nHebrew (ISO 8859-8) in Visual Directionality
\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n\n'
+ iso_8859_8= b'\nHebrew (ISO 8859-8) in Visual Directionality\n\n\n\n\n\nHebrew (ISO 8859-8) in Visual Directionality
\n\xed\xe5\xec\xf9\n\n'
+ utf8 = b'\nHebrew (ISO 8859-8) in Visual Directionality\n\n\nHebrew (ISO 8859-8) in Visual Directionality
\n\xd7\x9d\xd7\x95\xd7\x9c\xd7\xa9\n\n'
soup = BeautifulStoneSoup(iso_8859_8, fromEncoding="iso-8859-8")
self.assertEquals(soup.encode('utf-8'), utf8)
def testSmartQuotesNotSoSmartAnymore(self):
- self.assertSoupEquals("\x91Foo\x92 ",
+ self.assertSoupEquals(b"\x91Foo\x92 ",
'‘Foo’ ')
def testDontConvertSmartQuotesWhenAlsoConvertingEntities(self):
- smartQuotes = "Il a dit, \x8BSacré bleu!\x9b"
+ smartQuotes = b"Il a dit, \x8BSacré bleu!\x9b"
soup = BeautifulSoup(smartQuotes)
self.assertEquals(soup.decode(),
'Il a dit, ‹Sacré bleu!›')
soup = BeautifulSoup(smartQuotes, convertEntities="html")
self.assertEquals(soup.encode('utf-8'),
- 'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
+ b'Il a dit, \xe2\x80\xb9Sacr\xc3\xa9 bleu!\xe2\x80\xba')
def testDontSeeSmartQuotesWhereThereAreNone(self):
- utf_8 = "\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
self.assertSoupEquals(utf_8, encoding='utf-8')
--- setup.py
+++ setup.py
@@ -19,19 +19,19 @@
suite = loader.loadTestsFromModule(BeautifulSoupTests)
suite.run(result)
if not result.wasSuccessful():
- print "Unit tests have failed!"
+ print("Unit tests have failed!")
for l in result.errors, result.failures:
for case, error in l:
- print "-" * 80
+ print("-" * 80)
desc = case.shortDescription()
if desc:
- print desc
- print error
- print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?'''
- print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup."
+ print(desc)
+ print(error)
+ print('''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''')
+ print("This might or might not be a problem depending on what you plan to do with\nBeautiful Soup.")
if sys.argv[1] == 'sdist':
- print
- print "I'm not going to make a source distribution since the tests don't pass."
+ print()
+ print("I'm not going to make a source distribution since the tests don't pass.")
sys.exit(1)
setup(name="BeautifulSoup",